From 0f53d439065e715a5b4effbf8ed8ae53feae2a7e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 26 Sep 2025 00:20:08 +0000 Subject: [PATCH 1/9] Initial plan From 6a85e232b52a9c527e1ad79931e53b27f9505a25 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 26 Sep 2025 00:34:47 +0000 Subject: [PATCH 2/9] Add reasoning_effort parameter support for OpenAI GPT-5 models Co-authored-by: victordibia <1547007+victordibia@users.noreply.github.com> --- .../models/openai/config/__init__.py | 2 + .../tests/models/test_openai_model_client.py | 107 ++++++++++++++++++ 2 files changed, 109 insertions(+) diff --git a/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py b/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py index 02b1e3a80ff6..709722143c60 100644 --- a/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py @@ -50,6 +50,7 @@ class CreateArguments(TypedDict, total=False): user: str stream_options: Optional[StreamOptions] parallel_tool_calls: Optional[bool] + reasoning_effort: Optional[Literal["low", "medium", "high"]] AsyncAzureADTokenProvider = Callable[[], Union[str, Awaitable[str]]] @@ -99,6 +100,7 @@ class CreateArgumentsConfigModel(BaseModel): user: str | None = None stream_options: StreamOptions | None = None parallel_tool_calls: bool | None = None + reasoning_effort: Literal["low", "medium", "high"] | None = None class BaseOpenAIClientConfigurationConfigModel(CreateArgumentsConfigModel): diff --git a/python/packages/autogen-ext/tests/models/test_openai_model_client.py b/python/packages/autogen-ext/tests/models/test_openai_model_client.py index 2c1e19521ac4..7bf9b211d859 100644 --- a/python/packages/autogen-ext/tests/models/test_openai_model_client.py +++ b/python/packages/autogen-ext/tests/models/test_openai_model_client.py @@ -3269,3 +3269,110 @@ def _different_function(text: str) -> str: # TODO: add integration tests for Azure OpenAI using AAD token. + + +@pytest.mark.asyncio +async def test_reasoning_effort_parameter() -> None: + """Test that reasoning_effort parameter is properly handled in client configuration.""" + + # Test OpenAI client with reasoning_effort + openai_client = OpenAIChatCompletionClient( + model="gpt-5", + api_key="fake_key", + reasoning_effort="low", + ) + assert openai_client._create_args["reasoning_effort"] == "low" + + # Test Azure OpenAI client with reasoning_effort + azure_client = AzureOpenAIChatCompletionClient( + model="gpt-5", + azure_endpoint="fake_endpoint", + azure_deployment="gpt-5-2025-08-07", + api_version="2025-02-01-preview", + api_key="fake_key", + reasoning_effort="medium", + ) + assert azure_client._create_args["reasoning_effort"] == "medium" + + # Test load_component with reasoning_effort for OpenAI + from autogen_core.models import ChatCompletionClient + + openai_config = { + "provider": "OpenAIChatCompletionClient", + "config": { + "model": "gpt-5", + "api_key": "fake_key", + "reasoning_effort": "high", + } + } + + loaded_openai_client = ChatCompletionClient.load_component(openai_config) + assert loaded_openai_client._create_args["reasoning_effort"] == "high" + assert loaded_openai_client._raw_config["reasoning_effort"] == "high" + + # Test load_component with reasoning_effort for Azure OpenAI + azure_config = { + "provider": "AzureOpenAIChatCompletionClient", + "config": { + "model": "gpt-5", + "azure_endpoint": "fake_endpoint", + "azure_deployment": "gpt-5-2025-08-07", + "api_version": "2025-02-01-preview", + "api_key": "fake_key", + "reasoning_effort": "low", + } + } + + loaded_azure_client = ChatCompletionClient.load_component(azure_config) + assert loaded_azure_client._create_args["reasoning_effort"] == "low" + assert loaded_azure_client._raw_config["reasoning_effort"] == "low" + + # Test serialization and deserialization + config_dict = openai_client.dump_component() + reloaded_client = OpenAIChatCompletionClient.load_component(config_dict) + assert reloaded_client._create_args["reasoning_effort"] == "low" + + +@pytest.mark.asyncio +async def test_reasoning_effort_validation() -> None: + """Test reasoning_effort parameter validation.""" + + # Test valid values + for valid_value in ["low", "medium", "high"]: + client = OpenAIChatCompletionClient( + model="gpt-5", + api_key="fake_key", + reasoning_effort=valid_value, + ) + assert client._create_args["reasoning_effort"] == valid_value + + # Test None value (should be included if explicitly set) + client_with_none = OpenAIChatCompletionClient( + model="gpt-5", + api_key="fake_key", + reasoning_effort=None, + ) + # When explicitly set to None, it will be included in create_args + assert client_with_none._create_args["reasoning_effort"] is None + + # Test not providing reasoning_effort (should not be in create_args) + client_without_reasoning = OpenAIChatCompletionClient( + model="gpt-5", + api_key="fake_key", + ) + assert "reasoning_effort" not in client_without_reasoning._create_args + + # Test invalid value via load_component (Pydantic validation) + with pytest.raises(Exception): # Should raise ValidationError + from autogen_core.models import ChatCompletionClient + + config = { + "provider": "OpenAIChatCompletionClient", + "config": { + "model": "gpt-5", + "api_key": "fake_key", + "reasoning_effort": "invalid_value", + } + } + + ChatCompletionClient.load_component(config) From 1921f6253f1e64c324be890d96fc9aa9a21d1d8b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 26 Sep 2025 00:38:40 +0000 Subject: [PATCH 3/9] Fix linting error in reasoning_effort test Co-authored-by: victordibia <1547007+victordibia@users.noreply.github.com> --- .../tests/models/test_openai_model_client.py | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/python/packages/autogen-ext/tests/models/test_openai_model_client.py b/python/packages/autogen-ext/tests/models/test_openai_model_client.py index 7bf9b211d859..27cb81c85dcd 100644 --- a/python/packages/autogen-ext/tests/models/test_openai_model_client.py +++ b/python/packages/autogen-ext/tests/models/test_openai_model_client.py @@ -3274,7 +3274,7 @@ def _different_function(text: str) -> str: @pytest.mark.asyncio async def test_reasoning_effort_parameter() -> None: """Test that reasoning_effort parameter is properly handled in client configuration.""" - + # Test OpenAI client with reasoning_effort openai_client = OpenAIChatCompletionClient( model="gpt-5", @@ -3282,7 +3282,7 @@ async def test_reasoning_effort_parameter() -> None: reasoning_effort="low", ) assert openai_client._create_args["reasoning_effort"] == "low" - + # Test Azure OpenAI client with reasoning_effort azure_client = AzureOpenAIChatCompletionClient( model="gpt-5", @@ -3293,86 +3293,88 @@ async def test_reasoning_effort_parameter() -> None: reasoning_effort="medium", ) assert azure_client._create_args["reasoning_effort"] == "medium" - + # Test load_component with reasoning_effort for OpenAI from autogen_core.models import ChatCompletionClient - + openai_config = { "provider": "OpenAIChatCompletionClient", "config": { "model": "gpt-5", "api_key": "fake_key", "reasoning_effort": "high", - } + }, } - + loaded_openai_client = ChatCompletionClient.load_component(openai_config) assert loaded_openai_client._create_args["reasoning_effort"] == "high" assert loaded_openai_client._raw_config["reasoning_effort"] == "high" - + # Test load_component with reasoning_effort for Azure OpenAI azure_config = { "provider": "AzureOpenAIChatCompletionClient", "config": { "model": "gpt-5", - "azure_endpoint": "fake_endpoint", + "azure_endpoint": "fake_endpoint", "azure_deployment": "gpt-5-2025-08-07", "api_version": "2025-02-01-preview", "api_key": "fake_key", "reasoning_effort": "low", - } + }, } - + loaded_azure_client = ChatCompletionClient.load_component(azure_config) assert loaded_azure_client._create_args["reasoning_effort"] == "low" assert loaded_azure_client._raw_config["reasoning_effort"] == "low" - + # Test serialization and deserialization config_dict = openai_client.dump_component() reloaded_client = OpenAIChatCompletionClient.load_component(config_dict) assert reloaded_client._create_args["reasoning_effort"] == "low" -@pytest.mark.asyncio +@pytest.mark.asyncio async def test_reasoning_effort_validation() -> None: """Test reasoning_effort parameter validation.""" - + # Test valid values for valid_value in ["low", "medium", "high"]: client = OpenAIChatCompletionClient( model="gpt-5", - api_key="fake_key", + api_key="fake_key", reasoning_effort=valid_value, ) assert client._create_args["reasoning_effort"] == valid_value - + # Test None value (should be included if explicitly set) client_with_none = OpenAIChatCompletionClient( model="gpt-5", - api_key="fake_key", + api_key="fake_key", reasoning_effort=None, ) # When explicitly set to None, it will be included in create_args assert client_with_none._create_args["reasoning_effort"] is None - + # Test not providing reasoning_effort (should not be in create_args) client_without_reasoning = OpenAIChatCompletionClient( model="gpt-5", api_key="fake_key", ) assert "reasoning_effort" not in client_without_reasoning._create_args - + # Test invalid value via load_component (Pydantic validation) - with pytest.raises(Exception): # Should raise ValidationError + from pydantic import ValidationError + + with pytest.raises(ValidationError): # Should raise ValidationError from autogen_core.models import ChatCompletionClient - + config = { "provider": "OpenAIChatCompletionClient", "config": { "model": "gpt-5", "api_key": "fake_key", "reasoning_effort": "invalid_value", - } + }, } - + ChatCompletionClient.load_component(config) From 054c4b56a2a3cbea3e4f8ab144d27e68403c7126 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Sep 2025 04:43:19 +0000 Subject: [PATCH 4/9] Add documentation for reasoning_effort parameter Co-authored-by: ekzhu <320302+ekzhu@users.noreply.github.com> --- .../src/autogen_ext/models/openai/config/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py b/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py index 709722143c60..d63c95a26fd7 100644 --- a/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py @@ -51,6 +51,11 @@ class CreateArguments(TypedDict, total=False): stream_options: Optional[StreamOptions] parallel_tool_calls: Optional[bool] reasoning_effort: Optional[Literal["low", "medium", "high"]] + """Controls the amount of effort the model uses for reasoning. + Only applicable to reasoning models like o1 and o3-mini. + - 'low': Faster responses with less reasoning + - 'medium': Balanced reasoning and speed + - 'high': More thorough reasoning, may take longer""" AsyncAzureADTokenProvider = Callable[[], Union[str, Awaitable[str]]] @@ -100,6 +105,7 @@ class CreateArgumentsConfigModel(BaseModel): user: str | None = None stream_options: StreamOptions | None = None parallel_tool_calls: bool | None = None + # Controls the amount of effort the model uses for reasoning (reasoning models only) reasoning_effort: Literal["low", "medium", "high"] | None = None From b6bf024cde9adb0a6186f25ad17cd4243b5eede0 Mon Sep 17 00:00:00 2001 From: Eric Zhu Date: Mon, 29 Sep 2025 21:58:13 -0700 Subject: [PATCH 5/9] Apply suggestion from @BaillyM Co-authored-by: Maurice Bailly --- .../src/autogen_ext/models/openai/config/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py b/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py index d63c95a26fd7..97e5bbe014cc 100644 --- a/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py @@ -50,7 +50,7 @@ class CreateArguments(TypedDict, total=False): user: str stream_options: Optional[StreamOptions] parallel_tool_calls: Optional[bool] - reasoning_effort: Optional[Literal["low", "medium", "high"]] + reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] """Controls the amount of effort the model uses for reasoning. Only applicable to reasoning models like o1 and o3-mini. - 'low': Faster responses with less reasoning From 27fdc6aa561c402c24ece053458048e99c53f5c7 Mon Sep 17 00:00:00 2001 From: Eric Zhu Date: Mon, 29 Sep 2025 21:58:23 -0700 Subject: [PATCH 6/9] Apply suggestion from @BaillyM Co-authored-by: Maurice Bailly --- .../src/autogen_ext/models/openai/config/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py b/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py index 97e5bbe014cc..1bef62d35804 100644 --- a/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py @@ -106,7 +106,7 @@ class CreateArgumentsConfigModel(BaseModel): stream_options: StreamOptions | None = None parallel_tool_calls: bool | None = None # Controls the amount of effort the model uses for reasoning (reasoning models only) - reasoning_effort: Literal["low", "medium", "high"] | None = None + reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None class BaseOpenAIClientConfigurationConfigModel(CreateArgumentsConfigModel): From 7658f86b4f5f8a48e3a4ce438f12dced5f8b3edf Mon Sep 17 00:00:00 2001 From: Eric Zhu Date: Mon, 29 Sep 2025 21:59:35 -0700 Subject: [PATCH 7/9] Update python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py --- .../autogen-ext/src/autogen_ext/models/openai/config/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py b/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py index 1bef62d35804..d0a17875875e 100644 --- a/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/models/openai/config/__init__.py @@ -53,6 +53,7 @@ class CreateArguments(TypedDict, total=False): reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] """Controls the amount of effort the model uses for reasoning. Only applicable to reasoning models like o1 and o3-mini. + - 'minimal': Fastest response with minimal reasoning - 'low': Faster responses with less reasoning - 'medium': Balanced reasoning and speed - 'high': More thorough reasoning, may take longer""" From 64cf7696f893894e14af02f4c4e6f5626a84d381 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Sep 2025 05:10:01 +0000 Subject: [PATCH 8/9] Fix pyright errors in reasoning_effort tests by adding ignore comments for private usage Co-authored-by: ekzhu <320302+ekzhu@users.noreply.github.com> --- .../tests/models/test_openai_model_client.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/python/packages/autogen-ext/tests/models/test_openai_model_client.py b/python/packages/autogen-ext/tests/models/test_openai_model_client.py index 27cb81c85dcd..786a56f7a603 100644 --- a/python/packages/autogen-ext/tests/models/test_openai_model_client.py +++ b/python/packages/autogen-ext/tests/models/test_openai_model_client.py @@ -3281,7 +3281,7 @@ async def test_reasoning_effort_parameter() -> None: api_key="fake_key", reasoning_effort="low", ) - assert openai_client._create_args["reasoning_effort"] == "low" + assert openai_client._create_args["reasoning_effort"] == "low" # pyright: ignore[reportPrivateUsage] # Test Azure OpenAI client with reasoning_effort azure_client = AzureOpenAIChatCompletionClient( @@ -3292,7 +3292,7 @@ async def test_reasoning_effort_parameter() -> None: api_key="fake_key", reasoning_effort="medium", ) - assert azure_client._create_args["reasoning_effort"] == "medium" + assert azure_client._create_args["reasoning_effort"] == "medium" # pyright: ignore[reportPrivateUsage] # Test load_component with reasoning_effort for OpenAI from autogen_core.models import ChatCompletionClient @@ -3307,8 +3307,8 @@ async def test_reasoning_effort_parameter() -> None: } loaded_openai_client = ChatCompletionClient.load_component(openai_config) - assert loaded_openai_client._create_args["reasoning_effort"] == "high" - assert loaded_openai_client._raw_config["reasoning_effort"] == "high" + assert loaded_openai_client._create_args["reasoning_effort"] == "high" # pyright: ignore[reportPrivateUsage, reportUnknownMemberType, reportAttributeAccessIssue] + assert loaded_openai_client._raw_config["reasoning_effort"] == "high" # pyright: ignore[reportPrivateUsage, reportUnknownMemberType, reportAttributeAccessIssue] # Test load_component with reasoning_effort for Azure OpenAI azure_config = { @@ -3324,13 +3324,13 @@ async def test_reasoning_effort_parameter() -> None: } loaded_azure_client = ChatCompletionClient.load_component(azure_config) - assert loaded_azure_client._create_args["reasoning_effort"] == "low" - assert loaded_azure_client._raw_config["reasoning_effort"] == "low" + assert loaded_azure_client._create_args["reasoning_effort"] == "low" # pyright: ignore[reportPrivateUsage, reportUnknownMemberType, reportAttributeAccessIssue] + assert loaded_azure_client._raw_config["reasoning_effort"] == "low" # pyright: ignore[reportPrivateUsage, reportUnknownMemberType, reportAttributeAccessIssue] # Test serialization and deserialization config_dict = openai_client.dump_component() reloaded_client = OpenAIChatCompletionClient.load_component(config_dict) - assert reloaded_client._create_args["reasoning_effort"] == "low" + assert reloaded_client._create_args["reasoning_effort"] == "low" # pyright: ignore[reportPrivateUsage] @pytest.mark.asyncio @@ -3342,9 +3342,9 @@ async def test_reasoning_effort_validation() -> None: client = OpenAIChatCompletionClient( model="gpt-5", api_key="fake_key", - reasoning_effort=valid_value, + reasoning_effort=valid_value, # pyright: ignore[reportArgumentType] ) - assert client._create_args["reasoning_effort"] == valid_value + assert client._create_args["reasoning_effort"] == valid_value # pyright: ignore[reportPrivateUsage] # Test None value (should be included if explicitly set) client_with_none = OpenAIChatCompletionClient( @@ -3353,14 +3353,14 @@ async def test_reasoning_effort_validation() -> None: reasoning_effort=None, ) # When explicitly set to None, it will be included in create_args - assert client_with_none._create_args["reasoning_effort"] is None + assert client_with_none._create_args["reasoning_effort"] is None # pyright: ignore[reportPrivateUsage] # Test not providing reasoning_effort (should not be in create_args) client_without_reasoning = OpenAIChatCompletionClient( model="gpt-5", api_key="fake_key", ) - assert "reasoning_effort" not in client_without_reasoning._create_args + assert "reasoning_effort" not in client_without_reasoning._create_args # pyright: ignore[reportPrivateUsage] # Test invalid value via load_component (Pydantic validation) from pydantic import ValidationError From 5652767e1eb27ada7b9e7fbe15a3b1ff6b02f504 Mon Sep 17 00:00:00 2001 From: Eric Zhu Date: Mon, 29 Sep 2025 22:23:18 -0700 Subject: [PATCH 9/9] fix mypy --- .../tests/models/test_openai_model_client.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/packages/autogen-ext/tests/models/test_openai_model_client.py b/python/packages/autogen-ext/tests/models/test_openai_model_client.py index 786a56f7a603..ba79795d1ed7 100644 --- a/python/packages/autogen-ext/tests/models/test_openai_model_client.py +++ b/python/packages/autogen-ext/tests/models/test_openai_model_client.py @@ -3307,8 +3307,8 @@ async def test_reasoning_effort_parameter() -> None: } loaded_openai_client = ChatCompletionClient.load_component(openai_config) - assert loaded_openai_client._create_args["reasoning_effort"] == "high" # pyright: ignore[reportPrivateUsage, reportUnknownMemberType, reportAttributeAccessIssue] - assert loaded_openai_client._raw_config["reasoning_effort"] == "high" # pyright: ignore[reportPrivateUsage, reportUnknownMemberType, reportAttributeAccessIssue] + assert loaded_openai_client._create_args["reasoning_effort"] == "high" # type: ignore[attr-defined] # pyright: ignore[reportPrivateUsage, reportUnknownMemberType, reportAttributeAccessIssue] + assert loaded_openai_client._raw_config["reasoning_effort"] == "high" # type: ignore[attr-defined] # pyright: ignore[reportPrivateUsage, reportUnknownMemberType, reportAttributeAccessIssue] # Test load_component with reasoning_effort for Azure OpenAI azure_config = { @@ -3324,8 +3324,8 @@ async def test_reasoning_effort_parameter() -> None: } loaded_azure_client = ChatCompletionClient.load_component(azure_config) - assert loaded_azure_client._create_args["reasoning_effort"] == "low" # pyright: ignore[reportPrivateUsage, reportUnknownMemberType, reportAttributeAccessIssue] - assert loaded_azure_client._raw_config["reasoning_effort"] == "low" # pyright: ignore[reportPrivateUsage, reportUnknownMemberType, reportAttributeAccessIssue] + assert loaded_azure_client._create_args["reasoning_effort"] == "low" # type: ignore[attr-defined] # pyright: ignore[reportPrivateUsage, reportUnknownMemberType, reportAttributeAccessIssue] + assert loaded_azure_client._raw_config["reasoning_effort"] == "low" # type: ignore[attr-defined] # pyright: ignore[reportPrivateUsage, reportUnknownMemberType, reportAttributeAccessIssue] # Test serialization and deserialization config_dict = openai_client.dump_component() @@ -3342,7 +3342,7 @@ async def test_reasoning_effort_validation() -> None: client = OpenAIChatCompletionClient( model="gpt-5", api_key="fake_key", - reasoning_effort=valid_value, # pyright: ignore[reportArgumentType] + reasoning_effort=valid_value, # type: ignore[arg-type] # pyright: ignore[reportArgumentType] ) assert client._create_args["reasoning_effort"] == valid_value # pyright: ignore[reportPrivateUsage]