Adjust rmm pool handling in PDSH benchmarks (#20138)

TomAugspurger · web-flow · commit c2283f7b0d80 · 2025-10-03T17:06:14.000Z
This updates the handling of the RMM Pool argument for the PDSH CLI. Previously, `--rmm-pool-size` was used for both sync and async RMM memory resources. As described in rapidsai/rmm#2060, the meaning of the pool size is different for these two types of memory resources, and rapidsai/dask-cuda#1563 is deprecating passing a pool size to `dask_cuda.LocalCUDACluster` with `rmm_async=True`. No real difference in the timings (maybe a small improvement, but more likely just noise): ``` # New default python python/cudf_polars/cudf_polars/experimental/benchmarks/pdsh.py --path /datasets/toaugspurger/tpch/scale-100/ --no-print-results --executor streaming --scheduler distributed --iterations 3 --rmm-async 1,2,3,4,5 # Total mean time across all queries: 45.6239 seconds # Previous default python python/cudf_polars/cudf_polars/experimental/benchmarks/pdsh.py --path /datasets/toaugspurger/tpch/scale-100/ --no-print-results --executor streaming --scheduler distributed --iterations 3 --rmm-async --rmm-pool-size 0.5 1,2,3,4,5 # Total mean time across all queries: 47.8799 seconds ``` Authors: - Tom Augspurger (https://github.com/TomAugspurger) Approvers: - Richard (Rick) Zamora (https://github.com/rjzamora) - Bradley Dice (https://github.com/bdice) URL: #20138
diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
@@ -669,11 +669,12 @@ def parse_args(
     )
     parser.add_argument(
         "--rmm-pool-size",
-        default=0.5,
+        default=None,
         type=float,
         help=textwrap.dedent("""\
             Fraction of total GPU memory to allocate for RMM pool.
-            Default: 0.5 (50%% of GPU memory)"""),
+            Default: 0.5 (50%% of GPU memory) when --no-rmm-async,
+                     None when --rmm-async"""),
     )
     parser.add_argument(
         "--rmm-release-threshold",
@@ -777,7 +778,14 @@ def parse_args(
         default=False,
         help="Enable statistics planning.",
     )
-    return parser.parse_args(args)
+
+    parsed_args = parser.parse_args(args)
+
+    if parsed_args.rmm_pool_size is None and not parsed_args.rmm_async:
+        # The default rmm pool size depends on the rmm_async flag
+        parsed_args.rmm_pool_size = 0.5
+
+    return parsed_args
 
 
 def run_polars(