From 4f89333040416d694baa1a8fb9c53d5751a5264b Mon Sep 17 00:00:00 2001
From: Hussainbeam <88007126+SYED-M-HUSSAIN@users.noreply.github.com>
Date: Mon, 26 May 2025 14:11:14 +0500
Subject: [PATCH 1/2] fix: respect retry_delay from Gemini 429
 ResourceExhausted error in retry logic

---
 .../langchain_google_genai/chat_models.py     | 33 +++++++++++++++++--
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/libs/genai/langchain_google_genai/chat_models.py b/libs/genai/langchain_google_genai/chat_models.py
index 485abe45b..043658178 100644
--- a/libs/genai/langchain_google_genai/chat_models.py
+++ b/libs/genai/langchain_google_genai/chat_models.py
@@ -95,6 +95,7 @@
     stop_after_attempt,
     wait_exponential,
 )
+from tenacity.wait import wait_base
 from typing_extensions import Self, is_typeddict
 
 from langchain_google_genai._common import (
@@ -139,13 +140,33 @@ class ChatGoogleGenerativeAIError(GoogleGenerativeAIError):
     """
 
 
+class wait_with_server_retry_delay(wait_base):
+    def __init__(self, fallback_wait):
+        self.fallback_wait = fallback_wait
+
+    def __call__(self, retry_state):
+        exception = retry_state.outcome.exception()
+        # Check if it's a ResourceExhausted with retry_delay
+        if (
+            isinstance(exception, google.api_core.exceptions.ResourceExhausted)
+            and hasattr(exception, "retry_delay")
+            and exception.retry_delay is not None
+            and hasattr(exception.retry_delay, "seconds")
+        ):
+            delay = exception.retry_delay.seconds
+            logger.warning(f"Respecting server-suggested retry_delay: {delay}s")
+            return delay
+        # Otherwise use fallback (exponential backoff)
+        return self.fallback_wait(retry_state)
+
+
 def _create_retry_decorator() -> Callable[[Any], Any]:
     """
     Creates and returns a preconfigured tenacity retry decorator.
 
     The retry decorator is configured to handle specific Google API exceptions
-    such as ResourceExhausted and ServiceUnavailable. It uses an exponential
-    backoff strategy for retries.
+    such as ResourceExhausted and ServiceUnavailable. It uses a custom strategy
+    that respects retry_delay if provided by the API response.
 
     Returns:
         Callable[[Any], Any]: A retry decorator configured for handling specific
@@ -156,10 +177,16 @@ def _create_retry_decorator() -> Callable[[Any], Any]:
     max_seconds = 60
     max_retries = 2
 
+    fallback_wait = wait_exponential(
+        multiplier=multiplier,
+        min=min_seconds,
+        max=max_seconds
+    )
+
     return retry(
         reraise=True,
         stop=stop_after_attempt(max_retries),
-        wait=wait_exponential(multiplier=multiplier, min=min_seconds, max=max_seconds),
+        wait=wait_with_server_retry_delay(fallback_wait),
         retry=(
             retry_if_exception_type(google.api_core.exceptions.ResourceExhausted)
             | retry_if_exception_type(google.api_core.exceptions.ServiceUnavailable)

From 6616247a5b130a2a75ec7a9db8e11ed4c9e88aa1 Mon Sep 17 00:00:00 2001
From: Hussainbeam <88007126+SYED-M-HUSSAIN@users.noreply.github.com>
Date: Thu, 29 May 2025 10:40:07 +0500
Subject: [PATCH 2/2] formate the code through ruff

---
 libs/genai/langchain_google_genai/chat_models.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/libs/genai/langchain_google_genai/chat_models.py b/libs/genai/langchain_google_genai/chat_models.py
index 043658178..a1148283d 100644
--- a/libs/genai/langchain_google_genai/chat_models.py
+++ b/libs/genai/langchain_google_genai/chat_models.py
@@ -178,9 +178,7 @@ def _create_retry_decorator() -> Callable[[Any], Any]:
     max_retries = 2
 
     fallback_wait = wait_exponential(
-        multiplier=multiplier,
-        min=min_seconds,
-        max=max_seconds
+        multiplier=multiplier, min=min_seconds, max=max_seconds
     )
 
     return retry(