From 8c9a182505315854301394a88535099f652f904e Mon Sep 17 00:00:00 2001 From: SyedaAnshrahGillani Date: Fri, 25 Jul 2025 15:31:54 +0500 Subject: [PATCH] feat: Make webpage summarization timeout configurable This commit introduces a configurable timeout for the webpage summarization process. Previously, the `summarize_webpage` function in `utils.py` had a hardcoded 60-second timeout. This change allows users to specify a custom timeout via the `summarization_timeout` field in the `Configuration` class, providing more flexibility for varying content lengths and network conditions. - Added `summarization_timeout` field to `Configuration` in `configuration.py`. - Modified `summarize_webpage` in `utils.py` to accept and use the new timeout. - Updated `tavily_search` to pass the configurable timeout to `summarize_webpage`. --- src/open_deep_research/configuration.py | 10 ++++++++++ src/open_deep_research/utils.py | 5 +++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/open_deep_research/configuration.py b/src/open_deep_research/configuration.py index b78b991d9..86823fcc2 100644 --- a/src/open_deep_research/configuration.py +++ b/src/open_deep_research/configuration.py @@ -128,6 +128,16 @@ class Configuration(BaseModel): } } ) + summarization_timeout: float = Field( + default=60.0, + metadata={ + "x_oap_ui_config": { + "type": "number", + "default": 60.0, + "description": "Timeout in seconds for webpage summarization" + } + } + ) research_model: str = Field( default="openai:gpt-4.1", metadata={ diff --git a/src/open_deep_research/utils.py b/src/open_deep_research/utils.py index d79edd2b4..c1295c4cb 100644 --- a/src/open_deep_research/utils.py +++ b/src/open_deep_research/utils.py @@ -74,6 +74,7 @@ async def noop(): noop() if not result.get("raw_content") else summarize_webpage( summarization_model, result['raw_content'][:max_char_to_include], + timeout=configurable.summarization_timeout ) for result in unique_results.values() ] @@ -108,11 +109,11 @@ async def tavily_search_async(search_queries, max_results: int = 5, topic: Liter search_docs = await asyncio.gather(*search_tasks) return search_docs -async def summarize_webpage(model: BaseChatModel, webpage_content: str) -> str: +async def summarize_webpage(model: BaseChatModel, webpage_content: str, timeout: float = 60.0) -> str: try: summary = await asyncio.wait_for( model.ainvoke([HumanMessage(content=summarize_webpage_prompt.format(webpage_content=webpage_content, date=get_today_str()))]), - timeout=60.0 + timeout=timeout ) return f"""\n{summary.summary}\n\n\n\n{summary.key_excerpts}\n""" except (asyncio.TimeoutError, Exception) as e: