diff --git a/docs/source/how_to/how_to_benchmarking.ipynb b/docs/source/how_to/how_to_benchmarking.ipynb
index b6b19bf5c..7838f7c9f 100644
--- a/docs/source/how_to/how_to_benchmarking.ipynb
+++ b/docs/source/how_to/how_to_benchmarking.ipynb
@@ -135,6 +135,18 @@
"cell_type": "markdown",
"id": "10",
"metadata": {},
+ "source": [
+ ":::{note}\n",
+ "\n",
+ "For details on using other plotting backends, see [How to change the plotting backend](how_to_change_plotting_backend.ipynb).\n",
+ "\n",
+ ":::"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "11",
+ "metadata": {},
"source": [
"The x axis shows runtime per problem. The y axis shows the share of problems each algorithm solved within that runtime. Thus, higher and further to the left values are desirable. Higher means more problems were solved and further to the left means, the algorithm found the solutions earlier. \n",
"\n",
@@ -150,7 +162,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "11",
+ "id": "12",
"metadata": {},
"outputs": [],
"source": [
@@ -167,7 +179,7 @@
},
{
"cell_type": "markdown",
- "id": "12",
+ "id": "13",
"metadata": {},
"source": [
"## 4b. Convergence plots\n",
@@ -178,7 +190,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "13",
+ "id": "14",
"metadata": {},
"outputs": [],
"source": [
@@ -194,7 +206,7 @@
},
{
"cell_type": "markdown",
- "id": "14",
+ "id": "15",
"metadata": {},
"source": [
"The further to the left and the lower the curve of an algorithm, the better that algorithm performed.\n",
@@ -205,7 +217,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "15",
+ "id": "16",
"metadata": {},
"outputs": [],
"source": [
@@ -223,7 +235,7 @@
},
{
"cell_type": "markdown",
- "id": "16",
+ "id": "17",
"metadata": {},
"source": [
"## 5a. Convergence report\n",
@@ -235,7 +247,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "17",
+ "id": "18",
"metadata": {},
"outputs": [],
"source": [
@@ -251,7 +263,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "18",
+ "id": "19",
"metadata": {},
"outputs": [],
"source": [
@@ -260,10 +272,10 @@
},
{
"cell_type": "markdown",
- "id": "19",
+ "id": "20",
"metadata": {},
"source": [
- "## 5b. Rank report¶\n",
+ "## 5b. Rank report\n",
"\n",
"The **Rank Report** shows the ranks of the algorithms for each problem; where 0 means the algorithm was the fastest on a given benchmark problem, 1 means it was the second fastest and so on. If an algorithm did not converge on a problem, the value is \"failed\". If an algorithm did encounter an error during optimization, the value is \"error\"."
]
@@ -271,7 +283,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "20",
+ "id": "21",
"metadata": {},
"outputs": [],
"source": [
@@ -288,7 +300,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "21",
+ "id": "22",
"metadata": {},
"outputs": [],
"source": [
@@ -297,10 +309,10 @@
},
{
"cell_type": "markdown",
- "id": "22",
+ "id": "23",
"metadata": {},
"source": [
- "## 5b. Traceback report¶\n",
+ "## 5b. Traceback report\n",
"\n",
"The **Traceback Report** shows the tracebacks returned by the optimizers if they encountered an error during optimization. The resulting ```pd.DataFrame``` is empty if none of the optimizers terminated with an error, as in the example below."
]
@@ -308,7 +320,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "23",
+ "id": "24",
"metadata": {},
"outputs": [],
"source": [
@@ -318,7 +330,7 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "24",
+ "id": "25",
"metadata": {},
"outputs": [],
"source": [
diff --git a/src/optimagic/visualization/backends.py b/src/optimagic/visualization/backends.py
index 402e1768e..0b10c1710 100644
--- a/src/optimagic/visualization/backends.py
+++ b/src/optimagic/visualization/backends.py
@@ -23,6 +23,8 @@ def __call__(
height: int | None,
width: int | None,
legend_properties: dict[str, Any] | None,
+ margin_properties: dict[str, Any] | None,
+ horizontal_line: float | None,
) -> Any: ...
@@ -36,12 +38,32 @@ def _line_plot_plotly(
height: int | None,
width: int | None,
legend_properties: dict[str, Any] | None,
+ margin_properties: dict[str, Any] | None,
+ horizontal_line: float | None,
) -> go.Figure:
if template is None:
template = "simple_white"
fig = go.Figure()
+ fig.update_layout(
+ title=title,
+ xaxis_title=xlabel.format(linebreak="
") if xlabel else None,
+ yaxis_title=ylabel,
+ template=template,
+ height=height,
+ width=width,
+ legend=legend_properties,
+ margin=margin_properties,
+ )
+
+ if horizontal_line is not None:
+ fig.add_hline(
+ y=horizontal_line,
+ line_width=fig.layout.yaxis.linewidth or 1,
+ opacity=1.0,
+ )
+
for line in lines:
trace = go.Scatter(
x=line.x,
@@ -53,18 +75,6 @@ def _line_plot_plotly(
)
fig.add_trace(trace)
- fig.update_layout(
- title=title,
- xaxis_title=xlabel,
- yaxis_title=ylabel,
- template=template,
- height=height,
- width=width,
- )
-
- if legend_properties:
- fig.update_layout(legend=legend_properties)
-
return fig
@@ -78,6 +88,8 @@ def _line_plot_matplotlib(
height: int | None,
width: int | None,
legend_properties: dict[str, Any] | None,
+ margin_properties: dict[str, Any] | None,
+ horizontal_line: float | None,
) -> "plt.Axes":
import matplotlib.pyplot as plt
@@ -93,7 +105,17 @@ def _line_plot_matplotlib(
template = "default"
with plt.style.context(template):
- fig, ax = plt.subplots(figsize=(width, height) if width and height else None)
+ px = 1 / plt.rcParams["figure.dpi"] # pixel in inches
+ fig, ax = plt.subplots(
+ figsize=(width * px, height * px) if width and height else None
+ )
+
+ if horizontal_line is not None:
+ ax.axhline(
+ y=horizontal_line,
+ color=ax.spines["left"].get_edgecolor() or "gray",
+ linewidth=ax.spines["left"].get_linewidth() or 1.0,
+ )
for line in lines:
ax.plot(
@@ -103,9 +125,17 @@ def _line_plot_matplotlib(
color=line.color,
)
- ax.set(title=title, xlabel=xlabel, ylabel=ylabel)
- if legend_properties:
- ax.legend(**legend_properties)
+ ax.set(
+ title=title,
+ xlabel=xlabel.format(linebreak="\n") if xlabel else None,
+ ylabel=ylabel,
+ )
+
+ if legend_properties is None:
+ legend_properties = {}
+ ax.legend(**legend_properties)
+
+ fig.tight_layout()
return ax
@@ -129,6 +159,8 @@ def line_plot(
height: int | None = None,
width: int | None = None,
legend_properties: dict[str, Any] | None = None,
+ margin_properties: dict[str, Any] | None = None,
+ horizontal_line: float | None = None,
) -> Any:
"""Create a line plot corresponding to the specified backend.
@@ -144,6 +176,9 @@ def line_plot(
height: Height of the plot (in pixels).
width: Width of the plot (in pixels).
legend_properties: Backend-specific properties for the legend.
+ margin_properties: Backend-specific properties for the plot margins.
+ horizontal_line: If provided, a horizontal line is drawn at the specified
+ y-value.
Returns:
A figure object corresponding to the specified backend.
@@ -178,6 +213,8 @@ def line_plot(
height=height,
width=width,
legend_properties=legend_properties,
+ margin_properties=margin_properties,
+ horizontal_line=horizontal_line,
)
return fig
diff --git a/src/optimagic/visualization/profile_plot.py b/src/optimagic/visualization/profile_plot.py
index d046b957e..fa9629696 100644
--- a/src/optimagic/visualization/profile_plot.py
+++ b/src/optimagic/visualization/profile_plot.py
@@ -1,24 +1,48 @@
+import itertools
+from typing import Any, Literal
+
import numpy as np
import pandas as pd
-import plotly.express as px
+from numpy.typing import NDArray
from optimagic.benchmarking.process_benchmark_results import (
process_benchmark_results,
)
-from optimagic.config import PLOTLY_TEMPLATE
+from optimagic.config import DEFAULT_PALETTE
+from optimagic.visualization.backends import line_plot
+from optimagic.visualization.plotting_utilities import LineData, get_palette_cycle
+
+BACKEND_TO_PROFILE_PLOT_LEGEND_PROPERTIES: dict[str, dict[str, Any]] = {
+ "plotly": {"title": {"text": "algorithm"}},
+ "matplotlib": {
+ "bbox_to_anchor": (1.02, 1),
+ "loc": "upper left",
+ "fontsize": "x-small",
+ "title": "algorithm",
+ },
+}
+
+BACKEND_TO_PROFILE_PLOT_MARGIN_PROPERTIES: dict[str, dict[str, Any]] = {
+ "plotly": {"l": 10, "r": 10, "t": 30, "b": 30},
+ # "matplotlib": handles margins automatically via tight_layout()
+}
def profile_plot(
- problems,
- results,
+ problems: dict[str, dict[str, Any]],
+ results: dict[tuple[str, str], dict[str, Any]],
*,
- runtime_measure="n_evaluations",
- normalize_runtime=False,
- stopping_criterion="y",
- x_precision=1e-4,
- y_precision=1e-4,
- template=PLOTLY_TEMPLATE,
-):
+ runtime_measure: Literal[
+ "walltime", "n_evaluations", "n_batches"
+ ] = "n_evaluations",
+ normalize_runtime: bool = False,
+ stopping_criterion: Literal["x", "y", "x_and_y", "x_or_y"] = "y",
+ x_precision: float = 1e-4,
+ y_precision: float = 1e-4,
+ backend: Literal["plotly", "matplotlib"] = "plotly",
+ template: str | None = None,
+ palette: list[str] | str = DEFAULT_PALETTE,
+) -> Any:
"""Compare optimizers over a problem set.
This plot answers the question: What percentage of problems can each algorithm
@@ -39,45 +63,51 @@ def profile_plot(
Moré and Wild (2009).
Args:
- problems (dict): optimagic benchmarking problems dictionary. Keys are the
- problem names. Values contain information on the problem, including the
- solution value.
- results (dict): optimagic benchmarking results dictionary. Keys are
- tuples of the form (problem, algorithm), values are dictionaries of the
- collected information on the benchmark run, including 'criterion_history'
- and 'time_history'.
- runtime_measure (str): "n_evaluations", "n_batches" or "walltime".
- This is the runtime until the desired convergence was reached by an
- algorithm. This is called performance measure by Moré and Wild (2009).
- normalize_runtime (bool): If True the runtime each algorithm needed for each
- problem is scaled by the time the fastest algorithm needed. If True, the
- resulting plot is what Moré and Wild (2009) called data profiles.
- stopping_criterion (str): one of "x_and_y", "x_or_y", "x", "y". Determines
- how convergence is determined from the two precisions.
- x_precision (float or None): how close an algorithm must have gotten to the
- true parameter values (as percent of the Euclidean distance between start
- and solution parameters) before the criterion for clipping and convergence
- is fulfilled.
- y_precision (float or None): how close an algorithm must have gotten to the
- true criterion values (as percent of the distance between start
- and solution criterion value) before the criterion for clipping and
- convergence is fulfilled.
- template (str): The template for the figure. Default is "plotly_white".
+ problems: A dictionary where keys are the problem names. Values contain
+ information on the problem, including the solution value.
+ results: A dictionary where keys are tuples of the form (problem, algorithm),
+ values are dictionaries of the collected information on the benchmark
+ run, including 'criterion_history' and 'time_history'.
+ runtime_measure: This is the runtime until the desired convergence was reached
+ by an algorithm. This is called performance measure by Moré and Wild (2009).
+ normalize_runtime: If True the runtime each algorithm needed for each problem is
+ scaled by the time the fastest algorithm needed. If True, the resulting plot
+ is what Moré and Wild (2009) called data profiles.
+ stopping_criterion: Determines how convergence is determined from the two
+ precisions.
+ x_precision: How close an algorithm must have gotten to the true parameter
+ values (as percent of the Euclidean distance between start and solution
+ parameters) before the criterion for clipping and convergence is fulfilled.
+ y_precision: How close an algorithm must have gotten to the true criterion
+ values (as percent of the distance between start and solution criterion
+ value) before the criterion for clipping and convergence is fulfilled.
+ backend: The backend to use for plotting. Default is "plotly".
+ template: The template for the figure. If not specified, the default template of
+ the backend is used.
+ palette: The coloring palette for traces. Default is the D3 qualitative palette.
Returns:
- plotly.Figure
+ The figure object containing the profile plot.
"""
+ # ==================================================================================
+ # Process inputs
+
+ palette_cycle = get_palette_cycle(palette)
+
if stopping_criterion is None:
raise ValueError(
"You must specify a stopping criterion for the performance plot. "
)
if runtime_measure not in ["walltime", "n_evaluations", "n_batches"]:
raise ValueError(
- "Only 'walltime' or 'n_evaluations' are allowed as "
- f"runtime_measure. You specified {runtime_measure}."
+ "Only 'walltime', 'n_evaluations' or 'n_batches' are allowed as "
+ f"runtime_measure. You specified '{runtime_measure}'."
)
+ # ==================================================================================
+ # Extract backend-agnostic plotting data from benchmark results
+
df, converged_info = process_benchmark_results(
problems=problems,
results=results,
@@ -92,6 +122,54 @@ def profile_plot(
converged_info=converged_info,
)
+ lines = _extract_profile_plot_lines(
+ solution_times=solution_times,
+ normalize_runtime=normalize_runtime,
+ converged_info=converged_info,
+ palette_cycle=palette_cycle,
+ )
+
+ # ==================================================================================
+ # Generate the figure
+
+ fig = line_plot(
+ lines,
+ backend=backend,
+ xlabel=_get_profile_plot_xlabel(runtime_measure, normalize_runtime),
+ ylabel="Share of Problems Solved",
+ template=template,
+ height=300,
+ width=500,
+ legend_properties=BACKEND_TO_PROFILE_PLOT_LEGEND_PROPERTIES.get(backend, None),
+ margin_properties=BACKEND_TO_PROFILE_PLOT_MARGIN_PROPERTIES.get(backend, None),
+ horizontal_line=1.0,
+ )
+
+ return fig
+
+
+def _extract_profile_plot_lines(
+ solution_times: pd.DataFrame,
+ normalize_runtime: bool,
+ converged_info: pd.DataFrame,
+ palette_cycle: "itertools.cycle[str]",
+) -> list[LineData]:
+ """Extract lines for profile plot from data.
+
+ Args:
+ solution_times: A DataFrame where columns are the names of the algorithms,
+ indexes are the problems. Values are performance measures.
+ normalize_runtime: If True the runtime each algorithm needed for each problem is
+ scaled by the time the fastest algorithm needed.
+ converged_info: A DataFrame where columns are the names of the algorithms,
+ indexes are the problems. The values are boolean and True when the algorithm
+ arrived at the solution with the desired precision.
+ palette_cycle: Cycle of colors for plotting.
+
+ Returns:
+ A list of data objects containing data for each line of the profile plot.
+
+ """
if normalize_runtime:
solution_times = solution_times.divide(solution_times.min(axis=1), axis=0)
solution_times[~converged_info] = np.inf
@@ -103,67 +181,52 @@ def profile_plot(
)
performance_profiles = for_each_alpha.groupby("alpha").mean().stack().reset_index()
- fig = px.line(performance_profiles, x="alpha", y=0, color="algorithm")
-
- xlabels = {
- (
- "n_evaluations",
- True,
- ): "Multiple of Minimal Number of Function Evaluations
"
- "Needed to Solve the Problem",
- (
- "walltime",
- True,
- ): "Multiple of Minimal Wall Time
Needed to Solve the Problem",
- (
- "n_batches",
- True,
- ): "Multiple of Minimal Number of Batches
Needed to Solve the Problem",
- ("n_evaluations", False): "Number of Function Evaluations",
- ("walltime", False): "Wall Time Needed to Solve the Problem",
- ("n_batches", False): "Number of Batches",
- }
-
- fig.update_layout(
- xaxis_title=xlabels[(runtime_measure, normalize_runtime)],
- yaxis_title="Share of Problems Solved",
- title=None,
- height=300,
- width=500,
- margin={"l": 10, "r": 10, "t": 30, "b": 10},
- template=template,
- )
+ lines: list[LineData] = []
- fig.add_hline(y=1)
- return fig
+ for algorithm, data in performance_profiles.groupby("algorithm"):
+ line_data = LineData(
+ x=data["alpha"].to_numpy(),
+ y=data[0].to_numpy(),
+ name=str(algorithm),
+ color=next(palette_cycle),
+ )
+ lines.append(line_data)
+ return lines
-def create_solution_times(df, runtime_measure, converged_info, return_tidy=True):
+
+def create_solution_times(
+ df: pd.DataFrame,
+ runtime_measure: Literal["walltime", "n_evaluations", "n_batches"],
+ converged_info: pd.DataFrame,
+ return_tidy: bool = True,
+) -> pd.DataFrame:
"""Find the solution time for each algorithm and problem.
Args:
- df (pandas.DataFrame): contains 'problem', 'algorithm' and 'runtime_measure'
+ df: A DataFrame which contains 'problem', 'algorithm' and 'runtime_measure'
as columns.
- runtime_measure (str): 'walltime', 'n_batches' or 'n_evaluations'.
- converged_info (pandas.DataFrame): columns are the algorithms, indexes are the
- problems. The values are boolean and True when the algorithm arrived at
- the solution with the desired precision.
- return_tidy (bool): If True, the resulting DataFrame will be a tidy DataFrame
+ runtime_measure: This is the runtime until the desired convergence was reached
+ by an algorithm. This is called performance measure by Moré and Wild (2009).
+ converged_info: A DataFrame where columns are the names of the algorithms,
+ indexes are the problems. The values are boolean and True when the algorithm
+ arrived at the solution with the desired precision.
+ return_tidy: If True, the resulting DataFrame will be a tidy DataFrame
with problem and algorithm as indexes and runtime_measure as column.
If False, the resulting DataFrame will have problem, algorithm and
runtime_measure as columns.
Returns:
- solution_times (pandas.DataFrame): If return_tidy is True, indexes are the
- problems, columns are the algorithms. If return_tidy is False, columns are
- problem, algorithm and runtime_measure. The values are either the number
- of evaluations or the walltime each algorithm needed to achieve the
- desired precision. If the desired precision was not achieved the value is
- set to np.inf.
+ A DataFrame. If return_tidy is True, indexes are the problems, columns are the
+ algorithms. If return_tidy is False, columns are problem, algorithm and
+ runtime_measure. The values are either the number of evaluations or the
+ walltime each algorithm needed to achieve the desired precision. If the
+ desired precision was not achieved the value is set to np.inf.
"""
- solution_times = df.groupby(["problem", "algorithm"])[runtime_measure].max()
- solution_times = solution_times.unstack()
+ solution_times = (
+ df.groupby(["problem", "algorithm"])[runtime_measure].max().unstack()
+ )
# We convert the dtype to float to support the use of np.inf
solution_times = solution_times.astype(float).where(converged_info, other=np.inf)
@@ -176,7 +239,7 @@ def create_solution_times(df, runtime_measure, converged_info, return_tidy=True)
return solution_times
-def _determine_alpha_grid(solution_times):
+def _determine_alpha_grid(solution_times: pd.DataFrame) -> list[np.float64]:
switch_points = _find_switch_points(solution_times=solution_times)
point_to_right = switch_points[-1] * 1.05
@@ -186,18 +249,18 @@ def _determine_alpha_grid(solution_times):
return alphas
-def _find_switch_points(solution_times):
+def _find_switch_points(solution_times: pd.DataFrame) -> NDArray[np.float64]:
"""Determine the switch points of the performance profiles.
Args:
- solution_times (pandas.DataFrame): columns are the names of the algorithms,
- the indexes are the problems. Values are performance measures.
- They can be either float, when normalize_runtime was True or int when the
- runtime_measure are not normalized function evaluations or datetime when
- the not normalized walltime is used.
+ solution_times: A DataFrame where columns are the names of the algorithms,
+ indexes are the problems. Values are performance measures. They can be
+ either float, when normalize_runtime was True or int when the
+ runtime_measure are not normalized function evaluations or datetime when the
+ not normalized walltime is used.
Returns:
- list: sorted switching points
+ A sorted array of switching points.
"""
switch_points = np.unique(solution_times.values)
@@ -205,3 +268,31 @@ def _find_switch_points(solution_times):
switch_points += 1e-10
switch_points = switch_points[np.isfinite(switch_points)]
return switch_points
+
+
+def _get_profile_plot_xlabel(runtime_measure: str, normalize_runtime: bool) -> str:
+ # The '{linebreak}' placeholder is replaced with the backend-specific line break
+ # in the corresponding plotting function.
+
+ if normalize_runtime:
+ runtime_measure_to_xlabel = {
+ "walltime": (
+ "Multiple of Minimal Wall Time{linebreak}Needed to Solve the Problem"
+ ),
+ "n_evaluations": (
+ "Multiple of Minimal Number of Function Evaluations"
+ "{linebreak}Needed to Solve the Problem"
+ ),
+ "n_batches": (
+ "Multiple of Minimal Number of Batches"
+ "{linebreak}Needed to Solve the Problem"
+ ),
+ }
+ else:
+ runtime_measure_to_xlabel = {
+ "walltime": "Wall Time Needed to Solve the Problem",
+ "n_evaluations": "Number of Function Evaluations",
+ "n_batches": "Number of Batches",
+ }
+
+ return runtime_measure_to_xlabel[runtime_measure]
diff --git a/tests/optimagic/visualization/test_profile_plot.py b/tests/optimagic/visualization/test_profile_plot.py
index 30f84a9b4..c6ed62d55 100644
--- a/tests/optimagic/visualization/test_profile_plot.py
+++ b/tests/optimagic/visualization/test_profile_plot.py
@@ -1,11 +1,15 @@
+import itertools
+
import numpy as np
import pandas as pd
import pytest
+from numpy.testing import assert_allclose
from optimagic import get_benchmark_problems
from optimagic.benchmarking.run_benchmark import run_benchmark
from optimagic.visualization.profile_plot import (
_determine_alpha_grid,
+ _extract_profile_plot_lines,
_find_switch_points,
create_solution_times,
profile_plot,
@@ -134,11 +138,49 @@ def test_create_solution_times_walltime():
pd.testing.assert_frame_equal(res, expected)
+def test_extract_profile_plot_lines():
+ solution_times = pd.DataFrame(
+ {
+ "algo1": [1.0, 5],
+ "algo2": [3.0, np.inf],
+ },
+ index=["prob1", "prob2"],
+ )
+ solution_times.columns.name = "algorithm"
+
+ info = pd.DataFrame(
+ {
+ "algo1": [True, True],
+ "algo2": [True, False],
+ },
+ index=["prob1", "prob2"],
+ )
+
+ palette_cycle = itertools.cycle(["red", "green", "blue"])
+ lines = _extract_profile_plot_lines(
+ solution_times=solution_times,
+ normalize_runtime=False,
+ converged_info=info,
+ palette_cycle=palette_cycle,
+ )
+
+ assert isinstance(lines, list) and len(lines) == 2
+
+ assert_allclose(lines[0].x, np.array([1.0, 2.0, 3.0, 4.0, 5.0, 5.125, 5.25]))
+ assert_allclose(lines[0].y, np.array([0.5, 0.5, 0.5, 0.5, 1.0, 1.0, 1.0]))
+ assert lines[0].name == "algo1"
+
+ assert_allclose(lines[1].x, np.array([1.0, 2.0, 3.0, 4.0, 5.0, 5.125, 5.25]))
+ assert_allclose(lines[1].y, np.array([0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5]))
+ assert lines[1].name == "algo2"
+
+
# integration test to make sure non default argument do not throw Errors
profile_options = [
{"runtime_measure": "walltime"},
{"runtime_measure": "n_batches"},
{"stopping_criterion": "x_or_y"},
+ {"backend": "matplotlib"},
]