diff --git a/clusterscope/cluster_info.py b/clusterscope/cluster_info.py index 16ec8e3..54eb65d 100644 --- a/clusterscope/cluster_info.py +++ b/clusterscope/cluster_info.py @@ -297,7 +297,7 @@ def get_task_resource_requirements( if gpus_per_task == 0: ram_mb_per_cpu = total_ram_per_node / total_cpus_per_node - total_required_ram_mb = math.ceil( + total_required_ram_mb = math.floor( ram_mb_per_cpu * cpus_per_task * tasks_per_node ) # GPU Request @@ -305,19 +305,18 @@ def get_task_resource_requirements( total_gpus_per_node = self.get_total_gpus_per_node() cpu_cores_per_gpu = total_cpus_per_node / total_gpus_per_node - total_required_cpu_cores_per_task = math.ceil( + total_required_cpu_cores_per_task = math.floor( cpu_cores_per_gpu * gpus_per_task ) ram_mb_per_gpu = total_ram_per_node / total_gpus_per_node - total_required_ram_mb = math.ceil( + total_required_ram_mb = math.floor( ram_mb_per_gpu * gpus_per_task * tasks_per_node ) cpu_cores_per_task = total_required_cpu_cores_per_task / tasks_per_node - # CPU cores per task: Round up to ensure we don't under-allocate - cpus_per_task = math.ceil(cpu_cores_per_task) + cpus_per_task = math.floor(cpu_cores_per_task) # Memory per node: Convert MB to GB and format for Slurm # Note: Memory is allocated per node, not per task in most Slurm configurations @@ -366,34 +365,24 @@ def get_array_job_requirements( if gpus_per_task == max_gpus_per_node: # For max GPUs, use all available resources - required_cpu_cores = total_cpu_cores - required_ram_mb = total_ram_mb + required_cpu_cores = math.floor(total_cpu_cores) + required_ram_mb = math.floor(total_ram_mb) else: # Calculate per-GPU allocation based on actual GPU count per node cpu_cores_per_gpu = total_cpu_cores / max_gpus_per_node ram_mb_per_gpu = total_ram_mb / max_gpus_per_node # Calculate requirements per array element - required_cpu_cores = math.ceil(cpu_cores_per_gpu * gpus_per_task) - required_ram_mb = math.ceil(ram_mb_per_gpu * gpus_per_task) - - # Convert to Slurm SBATCH format - # CPU cores: Round up to ensure we don't under-allocate - sbatch_cpu_cores = math.ceil(required_cpu_cores) + required_cpu_cores = math.floor(cpu_cores_per_gpu * gpus_per_task) + required_ram_mb = math.floor(ram_mb_per_gpu * gpus_per_task) # Memory: Convert MB to GB and format for Slurm required_ram_gb = required_ram_mb / 1024 - if required_ram_gb >= 1024: - # Use TB format for very large memory - sbatch_memory = f"{required_ram_gb / 1024:.0f}T" - else: - # Use GB format (most common) - sbatch_memory = f"{required_ram_gb:.0f}G" + sbatch_memory = f"{required_ram_gb:.0f}G" - # Array jobs always have 1 task per array element return ResourceShape( slurm_partition=partition, - cpus_per_task=sbatch_cpu_cores, + cpus_per_task=required_cpu_cores, memory=sbatch_memory, tasks_per_node=1, gpus_per_task=gpus_per_task, diff --git a/tests/test_cluster_info.py b/tests/test_cluster_info.py index c440739..1cbb81f 100644 --- a/tests/test_cluster_info.py +++ b/tests/test_cluster_info.py @@ -888,8 +888,8 @@ def test_get_task_resource_requirements_cpu_rounding_up( partition="test_partition", gpus_per_task=1 ) - # 191/8 = 23.875, should round up to 24 - self.assertEqual(result.cpus_per_task, 24) + # 191/8 = 23.875, should round down to 23 + self.assertEqual(result.cpus_per_task, 23) @patch.object(UnifiedInfo, "get_total_gpus_per_node") def test_getResRequirements_invalid_tasks_per_node(self, mock_total_gpus): @@ -956,9 +956,7 @@ def test_getArrayJobRequirements_full_node( ) self.assertEqual(result.cpus_per_task, 192) # All CPUs - self.assertEqual( - result.memory, "2T" - ) # All memory: 1843200/1024/1024 = 1.8TB rounds to 2T + self.assertEqual(result.memory, "1800G") # All memory: 1843200/1024 = 1800GB self.assertEqual(result.tasks_per_node, 1) @patch.object(UnifiedInfo, "get_total_gpus_per_node")