From 8f2642239f28569ef91a7525fca64f02925507a6 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 31 Jul 2025 12:44:31 +0200 Subject: [PATCH 1/3] fix: Add startupProbe to prevent Superset startup problems --- CHANGELOG.md | 5 +++ .../src/superset_controller.rs | 45 ++++++++++++------- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a1c6edb..6c892534 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ ## [Unreleased] +### Fixed + +- Fix container not starting because Superset was starting too slow and was killed because a failing liveness probe. +We now add a proper startup probe, which allows Superset to take longer to start up ([#XXX]). + ## [25.7.0] - 2025-07-23 ## [25.7.0-rc1] - 2025-07-18 diff --git a/rust/operator-binary/src/superset_controller.rs b/rust/operator-binary/src/superset_controller.rs index 8e58aa8a..0da28232 100644 --- a/rust/operator-binary/src/superset_controller.rs +++ b/rust/operator-binary/src/superset_controller.rs @@ -786,21 +786,7 @@ fn build_server_rolegroup_statefulset( create_vector_shutdown_file_command(STACKABLE_LOG_DIR), }]) .resources(merged_config.resources.clone().into()); - let probe = Probe { - http_get: Some(HTTPGetAction { - port: IntOrString::Int(APP_PORT.into()), - path: Some("/health".to_string()), - ..HTTPGetAction::default() - }), - initial_delay_seconds: Some(15), - period_seconds: Some(15), - timeout_seconds: Some(1), - failure_threshold: Some(3), - success_threshold: Some(1), - ..Probe::default() - }; - superset_cb.readiness_probe(probe.clone()); - superset_cb.liveness_probe(probe); + add_superset_container_probes(&mut superset_cb); // listener endpoints will use persistent volumes // so that load balancers can hard-code the target addresses and @@ -932,6 +918,35 @@ fn build_server_rolegroup_statefulset( }) } +fn add_superset_container_probes(superset_cb: &mut ContainerBuilder) { + let probe_action = HTTPGetAction { + port: IntOrString::Int(APP_PORT.into()), + path: Some("/health".to_string()), + ..HTTPGetAction::default() + }; + let common_probe = Probe { + http_get: Some(probe_action), + period_seconds: Some(5), + timeout_seconds: Some(5), + success_threshold: Some(1), + ..Probe::default() + }; + superset_cb.startup_probe(Probe { + failure_threshold: Some(10 /* minutes */ * 60 / 5), + ..common_probe.clone() + }); + // Remove it from the Service immediately + superset_cb.readiness_probe(Probe { + failure_threshold: Some(1), + ..common_probe.clone() + }); + // But only restart it after 3 failures + superset_cb.readiness_probe(Probe { + failure_threshold: Some(3), + ..common_probe + }); +} + fn add_authentication_volumes_and_volume_mounts( auth_config: &SupersetClientAuthenticationDetailsResolved, cb: &mut ContainerBuilder, From 91e4392f9e19c6c946210d45df1e7781a6352f42 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 31 Jul 2025 12:47:08 +0200 Subject: [PATCH 2/3] changelog --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c892534..4dcded76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,9 @@ ### Fixed - Fix container not starting because Superset was starting too slow and was killed because a failing liveness probe. -We now add a proper startup probe, which allows Superset to take longer to start up ([#XXX]). +We now add a proper startup probe, which allows Superset to take longer to start up ([#654]). + +[#654]: https://github.com/stackabletech/superset-operator/pull/654 ## [25.7.0] - 2025-07-23 From 8edc0a11e80ffd7b990a654e209d1dff7ce1cdad Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Mon, 4 Aug 2025 09:58:51 +0200 Subject: [PATCH 3/3] Update CHANGELOG.md Co-authored-by: Nick <10092581+NickLarsenNZ@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4dcded76..7bfb0aec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ ### Fixed - Fix container not starting because Superset was starting too slow and was killed because a failing liveness probe. -We now add a proper startup probe, which allows Superset to take longer to start up ([#654]). + We now add a proper startup probe, which allows Superset to take longer to start up ([#654]). [#654]: https://github.com/stackabletech/superset-operator/pull/654