Skip to content

Commit

Permalink
Skip TCP-only DGX tests with UCX 1.16 (#1331)
Browse files Browse the repository at this point in the history
Wireup may fail in UCX 1.16 in nodes with multiple NICs if TCP is used, thus skip those tests. UCX 1.17 will resolve the issue, and alternatively `UCX_PROTO_ENABLE=n` may be used in UCX 1.16 as well.

Authors:
  - Peter Andreas Entschev (https://github.com/pentschev)

Approvers:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

URL: #1331
  • Loading branch information
pentschev authored Apr 8, 2024
1 parent 58e4b95 commit 7e03a52
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions dask_cuda/tests/test_dgx.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
psutil = pytest.importorskip("psutil")


def _is_ucx_116(ucp):
return ucp.get_ucx_version()[:2] == (1, 16)


class DGXVersion(Enum):
DGX_1 = auto()
DGX_2 = auto()
Expand Down Expand Up @@ -102,9 +106,11 @@ def check_ucx_options():
)
def test_tcp_over_ucx(protocol):
if protocol == "ucx":
pytest.importorskip("ucp")
ucp = pytest.importorskip("ucp")
elif protocol == "ucxx":
pytest.importorskip("ucxx")
ucp = pytest.importorskip("ucxx")
if _is_ucx_116(ucp):
pytest.skip("https://github.com/rapidsai/ucx-py/issues/1037")

p = mp.Process(target=_test_tcp_over_ucx, args=(protocol,))
p.start()
Expand Down Expand Up @@ -217,9 +223,11 @@ def check_ucx_options():
)
def test_ucx_infiniband_nvlink(protocol, params):
if protocol == "ucx":
pytest.importorskip("ucp")
ucp = pytest.importorskip("ucp")
elif protocol == "ucxx":
pytest.importorskip("ucxx")
ucp = pytest.importorskip("ucxx")
if _is_ucx_116(ucp) and params["enable_infiniband"] is False:
pytest.skip("https://github.com/rapidsai/ucx-py/issues/1037")

skip_queue = mp.Queue()

Expand Down

0 comments on commit 7e03a52

Please sign in to comment.