Test maximum pool size with RMM async

rapidsai · Aug 21, 2023 · 602dd5a · 602dd5a
1 parent eefd937
commit 602dd5a
Show file tree

Hide file tree

Showing 2 changed files with 84 additions and 0 deletions.
diff --git a/dask_cuda/tests/test_dask_cuda_worker.py b/dask_cuda/tests/test_dask_cuda_worker.py
@@ -153,6 +153,56 @@ def test_rmm_async(loop):  # noqa: F811
                 assert ret["[plugin] RMMSetup"]["release_threshold"] == 3000000000
 
 
+def test_rmm_async_with_maximum_pool_size(loop):  # noqa: F811
+    rmm = pytest.importorskip("rmm")
+
+    driver_version = rmm._cuda.gpu.driverGetVersion()
+    runtime_version = rmm._cuda.gpu.runtimeGetVersion()
+    if driver_version < 11020 or runtime_version < 11020:
+        pytest.skip("cudaMallocAsync not supported")
+
+    with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
+        with popen(
+            [
+                "dask",
+                "cuda",
+                "worker",
+                "127.0.0.1:9369",
+                "--host",
+                "127.0.0.1",
+                "--rmm-async",
+                "--rmm-pool-size",
+                "2 GB",
+                "--rmm-release-threshold",
+                "3 GB",
+                "--rmm-maximum-pool-size",
+                "4 GB",
+                "--no-dashboard",
+            ]
+        ):
+            with Client("127.0.0.1:9369", loop=loop) as client:
+                assert wait_workers(client, n_gpus=get_n_gpus())
+
+                memory_resource_types = client.run(
+                    lambda: (
+                        rmm.mr.get_current_device_resource_type(),
+                        type(rmm.mr.get_current_device_resource().get_upstream()),
+                    )
+                )
+                for v in memory_resource_types.values():
+                    memory_resource_type, upstream_memory_resource_type = v
+                    assert memory_resource_type is rmm.mr.LimitingResourceAdaptor
+                    assert (
+                        upstream_memory_resource_type is rmm.mr.CudaAsyncMemoryResource
+                    )
+
+                ret = get_cluster_configuration(client)
+                wait(ret)
+                assert ret["[plugin] RMMSetup"]["initial_pool_size"] == 2000000000
+                assert ret["[plugin] RMMSetup"]["release_threshold"] == 3000000000
+                assert ret["[plugin] RMMSetup"]["maximum_pool_size"] == 4000000000
+
+
 def test_rmm_logging(loop):  # noqa: F811
     rmm = pytest.importorskip("rmm")
     with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):

diff --git a/dask_cuda/tests/test_local_cuda_cluster.py b/dask_cuda/tests/test_local_cuda_cluster.py
@@ -261,6 +261,40 @@ async def test_rmm_async():
             assert ret["[plugin] RMMSetup"]["release_threshold"] == 3000000000
 
 
+@gen_test(timeout=20)
+async def test_rmm_async_with_maximum_pool_size():
+    rmm = pytest.importorskip("rmm")
+
+    driver_version = rmm._cuda.gpu.driverGetVersion()
+    runtime_version = rmm._cuda.gpu.runtimeGetVersion()
+    if driver_version < 11020 or runtime_version < 11020:
+        pytest.skip("cudaMallocAsync not supported")
+
+    async with LocalCUDACluster(
+        rmm_async=True,
+        rmm_pool_size="2GB",
+        rmm_release_threshold="3GB",
+        rmm_maximum_pool_size="4GB",
+        asynchronous=True,
+    ) as cluster:
+        async with Client(cluster, asynchronous=True) as client:
+            memory_resource_types = await client.run(
+                lambda: (
+                    rmm.mr.get_current_device_resource_type(),
+                    type(rmm.mr.get_current_device_resource().get_upstream()),
+                )
+            )
+            for v in memory_resource_types.values():
+                memory_resource_type, upstream_memory_resource_type = v
+                assert memory_resource_type is rmm.mr.LimitingResourceAdaptor
+                assert upstream_memory_resource_type is rmm.mr.CudaAsyncMemoryResource
+
+            ret = await get_cluster_configuration(client)
+            assert ret["[plugin] RMMSetup"]["initial_pool_size"] == 2000000000
+            assert ret["[plugin] RMMSetup"]["release_threshold"] == 3000000000
+            assert ret["[plugin] RMMSetup"]["maximum_pool_size"] == 4000000000
+
+
 @gen_test(timeout=20)
 async def test_rmm_logging():
     rmm = pytest.importorskip("rmm")