From 6da10a49805395d2c29cdde6060a97075564bf1b Mon Sep 17 00:00:00 2001 From: Ahmed Mezghani Date: Mon, 30 Jun 2025 16:39:21 +0200 Subject: [PATCH] Add reproducing test cases --- .../tests/cases/deeply_nested_plan.rs | 104 +++++ datafusion/substrait/tests/cases/mod.rs | 1 + .../test_plans/deeply_nested_tpl.json | 382 ++++++++++++++++++ 3 files changed, 487 insertions(+) create mode 100644 datafusion/substrait/tests/cases/deeply_nested_plan.rs create mode 100644 datafusion/substrait/tests/testdata/test_plans/deeply_nested_tpl.json diff --git a/datafusion/substrait/tests/cases/deeply_nested_plan.rs b/datafusion/substrait/tests/cases/deeply_nested_plan.rs new file mode 100644 index 000000000000..e68e730e2b02 --- /dev/null +++ b/datafusion/substrait/tests/cases/deeply_nested_plan.rs @@ -0,0 +1,104 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Tests for deeply nested plans causing stack overflows + +#[cfg(test)] +mod tests { + use crate::utils::test::add_plan_schemas_to_ctx; + use datafusion::common::Result; + use datafusion::logical_expr::LogicalPlan; + use datafusion::prelude::SessionContext; + use datafusion_substrait::logical_plan::consumer::from_substrait_plan; + use serde_json::{json, Value}; + use substrait::proto::Plan; + + // The depth of the nested plan to generate (number of arguments in literal list) + const DEPTH: usize = 3000; + + #[tokio::test] + #[ignore] + async fn test_stack_overflow_planning() -> Result<()> { + let (ctx, plan) = setup().await?; + ctx.state().create_physical_plan(&plan).await?; + + Ok(()) + } + + /// Setup returns a session context and a logical plan for a deeply nested substrait plan. + async fn setup() -> Result<(SessionContext, LogicalPlan)> { + let proto = generate_deep_plan(DEPTH); + + let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto)?; + let plan = from_substrait_plan(&ctx.state(), &proto).await?; + Ok((ctx, plan)) + } + + /// Generate a deeply nested substrait plan by extending the arguments of the scalar function + /// in deeply_nested_tpl.json. This avoids committing a large json file to the repo. + fn generate_deep_plan(depth: usize) -> Plan { + let template = include_str!("../testdata/test_plans/deeply_nested_tpl.json"); + let mut data: Value = + serde_json::from_str(template).expect("failed to parse json"); + + // Locate the `arguments` array we want to extend + let args = data + .pointer_mut("/relations/0/root/input/project/input/aggregate/input/filter/condition/scalarFunction/arguments/2/value/scalarFunction/arguments") + .and_then(Value::as_array_mut) + .expect("couldn't find the arguments array"); + + // Insert N new arguments + for i in 1..depth { + let new_arg = json!( { + "value": { + "scalarFunction": { + "functionReference": 2, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": {} + } + } + }, + { + "value": { + "literal": { + "string": format!("VALUE_{}", i) + } + } + } + ] + } + } + }); + args.push(new_arg); + } + + serde_json::from_value(data).expect("failed to deserialize from value") + } +} diff --git a/datafusion/substrait/tests/cases/mod.rs b/datafusion/substrait/tests/cases/mod.rs index 777246e4139b..18c854bb00c6 100644 --- a/datafusion/substrait/tests/cases/mod.rs +++ b/datafusion/substrait/tests/cases/mod.rs @@ -16,6 +16,7 @@ // under the License. mod consumer_integration; +mod deeply_nested_plan; mod emit_kind_tests; mod function_test; mod logical_plans; diff --git a/datafusion/substrait/tests/testdata/test_plans/deeply_nested_tpl.json b/datafusion/substrait/tests/testdata/test_plans/deeply_nested_tpl.json new file mode 100644 index 000000000000..cb8fd32df9e8 --- /dev/null +++ b/datafusion/substrait/tests/testdata/test_plans/deeply_nested_tpl.json @@ -0,0 +1,382 @@ +{ + "extensionUris": [ + { + "extensionUriAnchor": 4, + "uri": "/functions_aggregate_generic.yaml" + }, + { + "extensionUriAnchor": 5, + "uri": "/functions_arithmetic.yaml" + }, + { + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, + { + "extensionUriAnchor": 2, + "uri": "/functions_string.yaml" + }, + { + "extensionUriAnchor": 3, + "uri": "/functions_comparison.yaml" + } + ], + "extensions": [ + { + "extensionFunction": { + "extensionUriReference": 1, + "name": "and:bool" + } + }, + { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "like:str_str" + } + }, + { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 2, + "name": "not_equal:any_any" + } + }, + { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 3, + "name": "count:" + } + }, + { + "extensionFunction": { + "extensionUriReference": 5, + "functionAnchor": 4, + "name": "sum:fp64" + } + }, + { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 5, + "name": "coalesce:any" + } + } + ], + "relations": [ + { + "root": { + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [ + 2, + 3 + ] + } + }, + "input": { + "aggregate": { + "common": { + "direct": {} + }, + "input": { + "filter": { + "common": { + "direct": {} + }, + "input": { + "read": { + "common": { + "direct": {} + }, + "baseSchema": { + "names": [ + "size", + "name", + "id" + ], + "struct": { + "types": [ + { + "fp64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + } + ], + "nullability": "NULLABILITY_REQUIRED" + } + }, + "virtualTable": { + "values": [ + { + "fields": [ + { + "fp64": 0.1, + "nullable": true + }, + { + "string": "field_1", + "nullable": true + }, + { + "string": "field_2", + "nullable": true + } + ] + }, + { + "fields": [ + { + "fp64": 0.1, + "nullable": true + }, + { + "string": "field_1", + "nullable": true + }, + { + "string": "field_2", + "nullable": true + } + ] + } + ] + } + } + }, + "condition": { + "scalarFunction": { + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [ + { + "value": { + "scalarFunction": { + "functionReference": 1, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": {} + } + } + }, + { + "value": { + "literal": { + "string": "%field_1%" + } + } + } + ] + } + } + }, + { + "value": { + "scalarFunction": { + "functionReference": 1, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": {} + } + } + }, + { + "value": { + "literal": { + "string": "%field_2%" + } + } + } + ] + } + } + }, + { + "value": { + "scalarFunction": { + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [ + { + "value": { + "scalarFunction": { + "functionReference": 2, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": {} + } + } + }, + { + "value": { + "literal": { + "string": "VALUE_0" + } + } + } + ] + } + } + } + ] + } + } + } + ] + } + } + } + }, + "groupings": [ + {} + ], + "measures": [ + { + "measure": { + "functionReference": 3, + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": { + "nullability": "NULLABILITY_REQUIRED" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL" + } + }, + { + "measure": { + "functionReference": 4, + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "fp64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": {} + }, + "rootReference": {} + } + } + } + ] + } + } + ] + } + }, + "expressions": [ + { + "selection": { + "directReference": { + "structField": {} + }, + "rootReference": {} + } + }, + { + "scalarFunction": { + "functionReference": 5, + "outputType": { + "fp64": { + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": {} + } + } + }, + { + "value": { + "literal": { + "fp64": 0 + } + } + } + ] + } + } + ] + } + }, + "names": [ + "count", + "size" + ] + } + } + ], + "version": { + "minorNumber": 1, + "producer": "producer" + } +} \ No newline at end of file