Merge pull request #2586 from AI-Hypercomputer:multislice-rl

Google-ML-Automation · Google-ML-Automation · commit d29bb51868e0 · 2025-11-04T10:56:46.000-08:00
PiperOrigin-RevId: 828047018
diff --git a/src/MaxText/configs/base.yml b/src/MaxText/configs/base.yml
@@ -471,7 +471,8 @@ ici_expert_parallelism: 1
 # Enable ZeRO-1 optimizer sharding over data axis
 shard_optimizer_over_data: False
 
-# The number of TPU slices is automatically determined, you should not set this explicitly. For ahead of time compilation,
+# Unless explicitly specified, the number of TPU slices is automatically determined. It should only be set for
+# disaggregated reinforcement learning workloads using multiple slices. For ahead of time compilation,
 # you should set compile_toplogy_num_slices, which will in turn set this value. For non-TPU environments this is set to 1.
 num_slices: -1
 
diff --git a/src/MaxText/max_utils.py b/src/MaxText/max_utils.py
@@ -289,6 +289,9 @@ def _retrieve_jax_init_info(raw_keys):
 
 def get_num_slices(raw_keys):
   """Calculate num_slices based on number of devices."""
+  if raw_keys["num_slices"] != -1:
+    max_logging.log(f"Using num_slices={raw_keys['num_slices']} per user request.")
+    return raw_keys["num_slices"]
   if raw_keys["hardware"] == "cpu":
     max_logging.log(" Setting num_slices=1 for CPU hardware type")
     return 1