From 5a1f4f9392285095bbd7816b226cf674d4ca1125 Mon Sep 17 00:00:00 2001 From: "Gavin.Zhu" Date: Thu, 18 Sep 2025 09:54:01 +0000 Subject: [PATCH 1/2] fix: fixed Planner division by zero errors in sglang backend Signed-off-by: Gavin.Zhu --- .../src/dynamo/planner/utils/planner_core.py | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/components/planner/src/dynamo/planner/utils/planner_core.py b/components/planner/src/dynamo/planner/utils/planner_core.py index 428f0c3e10..d6690f3b5c 100644 --- a/components/planner/src/dynamo/planner/utils/planner_core.py +++ b/components/planner/src/dynamo/planner/utils/planner_core.py @@ -372,16 +372,30 @@ async def make_adjustments(self): expect_ttft = self.prefill_interpolator.interpolate_ttft( self.last_metrics.isl ) - self.p_correction_factor = self.last_metrics.ttft / expect_ttft + if expect_ttft > 0: + self.p_correction_factor = self.last_metrics.ttft / expect_ttft + else: + logger.warning(f"Expected TTFT is {expect_ttft}, using default correction factor 1.0") + self.p_correction_factor = 1.0 # for ITL, we expect the correction factor to be close to 1 + if len(self.d_endpoints) > 0: + concurrency = (self.last_metrics.num_req # type: ignore + / len(self.d_endpoints) + * self.last_metrics.request_duration # type: ignore + / self.args.adjustment_interval) + else: + logger.warning("No decode workers available, using default concurrency of 1.0") + concurrency = 1.0 + expect_itl = self.decode_interpolator.interpolate_itl( - concurrency=self.last_metrics.num_req # type: ignore - / len(self.d_endpoints) - * self.last_metrics.request_duration # type: ignore - / self.args.adjustment_interval, + concurrency=concurrency, context_length=self.last_metrics.isl + self.last_metrics.osl / 2, # type: ignore ) - self.d_correction_factor = self.last_metrics.itl / expect_itl + if expect_itl > 0: + self.d_correction_factor = self.last_metrics.itl / expect_itl + else: + logger.warning(f"Expected ITL is {expect_itl}, using default correction factor 1.0") + self.d_correction_factor = 1.0 logger.info( f"Correction factors: TTFT: {self.p_correction_factor:.3f}, ITL: {self.d_correction_factor:.3f}" ) From f7154d58996b8b1a325bbe8f1cc6dc097c7bf29b Mon Sep 17 00:00:00 2001 From: "Gavin.Zhu" Date: Thu, 18 Sep 2025 10:32:13 +0000 Subject: [PATCH 2/2] fix: fixed format for precommit Signed-off-by: Gavin.Zhu --- .../src/dynamo/planner/utils/planner_core.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/components/planner/src/dynamo/planner/utils/planner_core.py b/components/planner/src/dynamo/planner/utils/planner_core.py index d6690f3b5c..a5c539e0c6 100644 --- a/components/planner/src/dynamo/planner/utils/planner_core.py +++ b/components/planner/src/dynamo/planner/utils/planner_core.py @@ -375,16 +375,22 @@ async def make_adjustments(self): if expect_ttft > 0: self.p_correction_factor = self.last_metrics.ttft / expect_ttft else: - logger.warning(f"Expected TTFT is {expect_ttft}, using default correction factor 1.0") + logger.warning( + f"Expected TTFT is {expect_ttft}, using default correction factor 1.0" + ) self.p_correction_factor = 1.0 # for ITL, we expect the correction factor to be close to 1 if len(self.d_endpoints) > 0: - concurrency = (self.last_metrics.num_req # type: ignore + concurrency = ( + self.last_metrics.num_req # type: ignore / len(self.d_endpoints) * self.last_metrics.request_duration # type: ignore - / self.args.adjustment_interval) + / self.args.adjustment_interval + ) else: - logger.warning("No decode workers available, using default concurrency of 1.0") + logger.warning( + "No decode workers available, using default concurrency of 1.0" + ) concurrency = 1.0 expect_itl = self.decode_interpolator.interpolate_itl( @@ -394,7 +400,9 @@ async def make_adjustments(self): if expect_itl > 0: self.d_correction_factor = self.last_metrics.itl / expect_itl else: - logger.warning(f"Expected ITL is {expect_itl}, using default correction factor 1.0") + logger.warning( + f"Expected ITL is {expect_itl}, using default correction factor 1.0" + ) self.d_correction_factor = 1.0 logger.info( f"Correction factors: TTFT: {self.p_correction_factor:.3f}, ITL: {self.d_correction_factor:.3f}"