@@ -395,8 +395,8 @@ def can_submit_new_task(allocator, op):
395
395
# 50% of the global limits are shared.
396
396
assert allocator ._total_shared == ExecutionResources (8 , 0 , 500 )
397
397
# Test budgets.
398
- assert allocator ._op_budgets [o2 ] == ExecutionResources (8 , float ( "inf" ) , 375 )
399
- assert allocator ._op_budgets [o3 ] == ExecutionResources (8 , float ( "inf" ) , 375 )
398
+ assert allocator ._op_budgets [o2 ] == ExecutionResources (8 , 0 , 375 )
399
+ assert allocator ._op_budgets [o3 ] == ExecutionResources (8 , 0 , 375 )
400
400
# Test can_submit_new_task and max_task_output_bytes_to_read.
401
401
assert can_submit_new_task (allocator , o2 )
402
402
assert can_submit_new_task (allocator , o3 )
@@ -425,9 +425,9 @@ def can_submit_new_task(allocator, op):
425
425
# remaining shared = 1000/2 - 275 = 225
426
426
# Test budgets.
427
427
# memory_budget[o2] = 0 + 225/2 = 112.5
428
- assert allocator ._op_budgets [o2 ] == ExecutionResources (3 , float ( "inf" ) , 112.5 )
428
+ assert allocator ._op_budgets [o2 ] == ExecutionResources (3 , 0 , 112.5 )
429
429
# memory_budget[o3] = 95 + 225/2 = 207.5
430
- assert allocator ._op_budgets [o3 ] == ExecutionResources (5 , float ( "inf" ) , 207.5 )
430
+ assert allocator ._op_budgets [o3 ] == ExecutionResources (5 , 0 , 207.5 )
431
431
# Test can_submit_new_task and max_task_output_bytes_to_read.
432
432
assert can_submit_new_task (allocator , o2 )
433
433
assert can_submit_new_task (allocator , o3 )
@@ -461,9 +461,9 @@ def can_submit_new_task(allocator, op):
461
461
462
462
# Test budgets.
463
463
# memory_budget[o2] = 0 + 100/2 = 50
464
- assert allocator ._op_budgets [o2 ] == ExecutionResources (1.5 , float ( "inf" ) , 50 )
464
+ assert allocator ._op_budgets [o2 ] == ExecutionResources (1.5 , 0 , 50 )
465
465
# memory_budget[o3] = 70 + 100/2 = 120
466
- assert allocator ._op_budgets [o3 ] == ExecutionResources (2.5 , float ( "inf" ) , 120 )
466
+ assert allocator ._op_budgets [o3 ] == ExecutionResources (2.5 , 0 , 120 )
467
467
# Test can_submit_new_task and max_task_output_bytes_to_read.
468
468
assert can_submit_new_task (allocator , o2 )
469
469
assert can_submit_new_task (allocator , o3 )
@@ -624,6 +624,93 @@ def test_only_handle_eligible_ops(self, restore_data_context):
624
624
allocator .update_usages ()
625
625
assert o2 not in allocator ._op_budgets
626
626
627
+ def test_gpu_allocation (self , restore_data_context ):
628
+ """Test GPU allocation for GPU vs non-GPU operators."""
629
+ DataContext .get_current ().op_resource_reservation_enabled = True
630
+ DataContext .get_current ().op_resource_reservation_ratio = 0.5
631
+
632
+ o1 = InputDataBuffer (DataContext .get_current (), [])
633
+
634
+ # Non-GPU operator
635
+ o2 = mock_map_op (o1 )
636
+ o2 .min_max_resource_requirements = MagicMock (
637
+ return_value = (ExecutionResources (0 , 0 , 0 ), ExecutionResources (0 , 0 , 0 ))
638
+ )
639
+
640
+ # GPU operator
641
+ o3 = mock_map_op (o2 , ray_remote_args = {"num_gpus" : 1 })
642
+ o3 .min_max_resource_requirements = MagicMock (
643
+ return_value = (ExecutionResources (0 , 1 , 0 ), ExecutionResources (0 , 1 , 0 ))
644
+ )
645
+
646
+ topo , _ = build_streaming_topology (o3 , ExecutionOptions ())
647
+
648
+ global_limits = ExecutionResources (gpu = 4 )
649
+ op_usages = {
650
+ o1 : ExecutionResources .zero (),
651
+ o2 : ExecutionResources .zero (),
652
+ o3 : ExecutionResources (gpu = 1 ), # GPU op using 1 GPU
653
+ }
654
+
655
+ resource_manager = ResourceManager (
656
+ topo , ExecutionOptions (), MagicMock (), DataContext .get_current ()
657
+ )
658
+ resource_manager .get_op_usage = MagicMock (side_effect = lambda op : op_usages [op ])
659
+ resource_manager ._mem_op_internal = dict .fromkeys ([o1 , o2 , o3 ], 0 )
660
+ resource_manager ._mem_op_outputs = dict .fromkeys ([o1 , o2 , o3 ], 0 )
661
+ resource_manager .get_global_limits = MagicMock (return_value = global_limits )
662
+
663
+ allocator = resource_manager ._op_resource_allocator
664
+ allocator .update_usages ()
665
+
666
+ # Non-GPU operator should get 0 GPU
667
+ assert allocator ._op_budgets [o2 ].gpu == 0
668
+
669
+ # GPU operator should get remaining GPUs (4 total - 1 used = 3 available)
670
+ assert allocator ._op_budgets [o3 ].gpu == 3
671
+
672
+ def test_multiple_gpu_operators (self , restore_data_context ):
673
+ """Test GPU allocation for multiple GPU operators."""
674
+ DataContext .get_current ().op_resource_reservation_enabled = True
675
+ DataContext .get_current ().op_resource_reservation_ratio = 0.5
676
+
677
+ o1 = InputDataBuffer (DataContext .get_current (), [])
678
+
679
+ # Two GPU operators
680
+ o2 = mock_map_op (o1 , ray_remote_args = {"num_gpus" : 1 })
681
+ o2 .min_max_resource_requirements = MagicMock (
682
+ return_value = (ExecutionResources (0 , 1 , 0 ), ExecutionResources (0 , 1 , 0 ))
683
+ )
684
+
685
+ o3 = mock_map_op (o2 , ray_remote_args = {"num_gpus" : 1 })
686
+ o3 .min_max_resource_requirements = MagicMock (
687
+ return_value = (ExecutionResources (0 , 1 , 0 ), ExecutionResources (0 , 1 , 0 ))
688
+ )
689
+
690
+ topo , _ = build_streaming_topology (o3 , ExecutionOptions ())
691
+
692
+ global_limits = ExecutionResources (gpu = 4 )
693
+ op_usages = {
694
+ o1 : ExecutionResources .zero (),
695
+ o2 : ExecutionResources (gpu = 1 ), # Using 1 GPU
696
+ o3 : ExecutionResources (gpu = 0 ), # Not using GPU yet
697
+ }
698
+
699
+ resource_manager = ResourceManager (
700
+ topo , ExecutionOptions (), MagicMock (), DataContext .get_current ()
701
+ )
702
+ resource_manager .get_op_usage = MagicMock (side_effect = lambda op : op_usages [op ])
703
+ resource_manager .get_global_limits = MagicMock (return_value = global_limits )
704
+
705
+ allocator = resource_manager ._op_resource_allocator
706
+ allocator .update_usages ()
707
+
708
+ # o2: 4 total - 1 used = 3 available
709
+ assert allocator ._op_budgets [o2 ].gpu == 3
710
+
711
+ # o3: 4 total - 0 used = 4 available
712
+ assert allocator ._op_budgets [o3 ].gpu == 4
713
+
627
714
628
715
if __name__ == "__main__" :
629
716
import sys
0 commit comments