|
2 | 2 | import gc |
3 | 3 | import logging |
4 | 4 | import sys |
| 5 | +import time |
5 | 6 | import weakref |
| 7 | +from unittest.mock import Mock |
6 | 8 |
|
7 | 9 | import numpy as np |
8 | 10 | import pytest |
9 | 11 |
|
10 | 12 | import ray |
11 | 13 | import ray.cluster_utils |
12 | 14 | from ray._common.test_utils import wait_for_condition |
| 15 | +from ray._private.gc_collect_manager import PythonGCThread |
13 | 16 | from ray._private.internal_api import global_gc |
14 | 17 |
|
15 | 18 | logger = logging.getLogger(__name__) |
@@ -216,5 +219,136 @@ def f(self): |
216 | 219 | gc.enable() |
217 | 220 |
|
218 | 221 |
|
| 222 | +def test_local_gc_called_once_per_interval(shutdown_only): |
| 223 | + ray.init( |
| 224 | + num_cpus=2, |
| 225 | + _system_config={ |
| 226 | + "local_gc_interval_s": 1, |
| 227 | + "local_gc_min_interval_s": 0, |
| 228 | + "global_gc_min_interval_s": 0, |
| 229 | + }, |
| 230 | + ) |
| 231 | + |
| 232 | + class ObjectWithCyclicRef: |
| 233 | + def __init__(self): |
| 234 | + self.loop = self |
| 235 | + |
| 236 | + @ray.remote(num_cpus=1) |
| 237 | + class GarbageHolder: |
| 238 | + def __init__(self): |
| 239 | + gc.disable() |
| 240 | + self.garbage = None |
| 241 | + |
| 242 | + def make_garbage(self): |
| 243 | + x = ObjectWithCyclicRef() |
| 244 | + self.garbage = weakref.ref(x) |
| 245 | + return True |
| 246 | + |
| 247 | + def has_garbage(self): |
| 248 | + return self.garbage() is not None |
| 249 | + |
| 250 | + def all_garbage_collected(local_ref): |
| 251 | + return local_ref() is None and not any( |
| 252 | + ray.get([a.has_garbage.remote() for a in actors]) |
| 253 | + ) |
| 254 | + |
| 255 | + try: |
| 256 | + gc.disable() |
| 257 | + |
| 258 | + # Round 1: first batch of garbage should be collected |
| 259 | + # Local driver. |
| 260 | + local_ref = weakref.ref(ObjectWithCyclicRef()) |
| 261 | + # Remote workers. |
| 262 | + actors = [GarbageHolder.remote() for _ in range(2)] |
| 263 | + ray.get([a.make_garbage.remote() for a in actors]) |
| 264 | + |
| 265 | + assert local_ref() is not None |
| 266 | + assert all(ray.get([a.has_garbage.remote() for a in actors])) |
| 267 | + |
| 268 | + wait_for_condition( |
| 269 | + lambda: all_garbage_collected(local_ref), |
| 270 | + ) |
| 271 | + |
| 272 | + # Round 2: second batch should NOT be collected within min_interval |
| 273 | + local_ref = weakref.ref(ObjectWithCyclicRef()) |
| 274 | + ray.get([a.make_garbage.remote() for a in actors]) |
| 275 | + |
| 276 | + with pytest.raises(RuntimeError): |
| 277 | + wait_for_condition( |
| 278 | + lambda: all_garbage_collected(local_ref), |
| 279 | + timeout=2.0, # shorter than min_interval |
| 280 | + retry_interval_ms=50, |
| 281 | + ) |
| 282 | + |
| 283 | + # Round 3: after min_interval passes, garbage should be collected |
| 284 | + wait_for_condition( |
| 285 | + lambda: all_garbage_collected(local_ref), |
| 286 | + timeout=10.0, |
| 287 | + retry_interval_ms=50, |
| 288 | + ) |
| 289 | + |
| 290 | + finally: |
| 291 | + gc.enable() |
| 292 | + |
| 293 | + |
| 294 | +def test_gc_manager_thread_basic_functionality(): |
| 295 | + mock_gc_collect = Mock(return_value=10) |
| 296 | + |
| 297 | + gc_thread = PythonGCThread(min_interval_s=1, gc_collect_func=mock_gc_collect) |
| 298 | + |
| 299 | + try: |
| 300 | + gc_thread.start() |
| 301 | + assert gc_thread.is_alive() |
| 302 | + |
| 303 | + gc_thread.trigger_gc() |
| 304 | + |
| 305 | + wait_for_condition(lambda: mock_gc_collect.call_count == 1, timeout=2) |
| 306 | + |
| 307 | + mock_gc_collect.assert_called_once() |
| 308 | + |
| 309 | + finally: |
| 310 | + gc_thread.stop() |
| 311 | + assert not gc_thread.is_alive() |
| 312 | + |
| 313 | + |
| 314 | +def test_gc_manager_thread_min_interval_throttling(): |
| 315 | + mock_gc_collect = Mock(return_value=5) |
| 316 | + |
| 317 | + gc_thread = PythonGCThread(min_interval_s=2, gc_collect_func=mock_gc_collect) |
| 318 | + |
| 319 | + try: |
| 320 | + gc_thread.start() |
| 321 | + |
| 322 | + for _ in range(3): |
| 323 | + gc_thread.trigger_gc() |
| 324 | + time.sleep(1) |
| 325 | + |
| 326 | + wait_for_condition(lambda: mock_gc_collect.call_count == 2, timeout=2) |
| 327 | + |
| 328 | + assert mock_gc_collect.call_count == 2 |
| 329 | + |
| 330 | + finally: |
| 331 | + gc_thread.stop() |
| 332 | + |
| 333 | + |
| 334 | +def test_gc_manager_thread_exception_handling(): |
| 335 | + mock_gc_collect = Mock(side_effect=RuntimeError("GC failed")) |
| 336 | + |
| 337 | + gc_thread = PythonGCThread(min_interval_s=5, gc_collect_func=mock_gc_collect) |
| 338 | + |
| 339 | + try: |
| 340 | + gc_thread.start() |
| 341 | + |
| 342 | + for _ in range(3): |
| 343 | + gc_thread.trigger_gc() |
| 344 | + time.sleep(0.1) |
| 345 | + |
| 346 | + assert gc_thread.is_alive() |
| 347 | + mock_gc_collect.assert_called_once() |
| 348 | + |
| 349 | + finally: |
| 350 | + gc_thread.stop() |
| 351 | + |
| 352 | + |
219 | 353 | if __name__ == "__main__": |
220 | 354 | sys.exit(pytest.main(["-sv", __file__])) |
0 commit comments