forked from spack/spack
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
PyTorch: add support for the UCC distributed backend (spack#45598)
- Loading branch information
1 parent
ca9a7b2
commit 60c589d
Showing
2 changed files
with
18 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,7 +31,7 @@ class Gloo(CMakePackage, CudaPackage): | |
version("2018-05-29", commit="69eef748cc1dfbe0fefed69b34e6545495f67ac5") # [email protected] | ||
version("2018-04-06", commit="aad0002fb40612e991390d8e807f247ed23f13c5") # py-torch@:0.4.0 | ||
|
||
depends_on("cxx", type="build") # generated | ||
variant("libuv", default=False, description="Build libuv transport") | ||
|
||
# Gloo does not build on Linux >=6.0.3 (fixed in master) | ||
# See: https://github.com/facebookincubator/gloo/issues/345 | ||
|
@@ -48,7 +48,13 @@ class Gloo(CMakePackage, CudaPackage): | |
) | ||
|
||
generator("ninja") | ||
depends_on("cxx", type="build") | ||
depends_on("pkgconfig", type="build") | ||
depends_on("[email protected]:", when="+libuv") | ||
depends_on("[email protected]:", type="build") | ||
|
||
def cmake_args(self): | ||
return [self.define_from_variant("USE_CUDA", "cuda")] | ||
return [ | ||
self.define_from_variant("USE_CUDA", "cuda"), | ||
self.define_from_variant("USE_LIBUV", "libuv"), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -91,15 +91,11 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): | |
variant("valgrind", default=True, description="Use Valgrind", when="@1.8: platform=linux") | ||
variant("xnnpack", default=True, description="Use XNNPACK", when="@1.5:") | ||
variant("mkldnn", default=True, description="Use MKLDNN") | ||
variant("distributed", default=not is_darwin, description="Use distributed") | ||
variant("mpi", default=not is_darwin, description="Use MPI for Caffe2", when="+distributed") | ||
variant("gloo", default=not is_darwin, description="Use Gloo", when="+distributed") | ||
variant( | ||
"tensorpipe", | ||
default=not is_darwin, | ||
description="Use TensorPipe", | ||
when="@1.6: +distributed", | ||
) | ||
variant("distributed", default=True, description="Use distributed") | ||
variant("mpi", default=True, description="Use MPI for Caffe2", when="+distributed") | ||
variant("ucc", default=False, description="Use UCC", when="@1.13: +distributed") | ||
variant("gloo", default=True, description="Use Gloo", when="+distributed") | ||
variant("tensorpipe", default=True, description="Use TensorPipe", when="@1.6: +distributed") | ||
variant("onnx_ml", default=True, description="Enable traditional ONNX ML API", when="@1.5:") | ||
variant( | ||
"breakpad", | ||
|
@@ -205,6 +201,7 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): | |
depends_on("gloo@2020-09-18", when="@1.7:1.8+gloo") | ||
depends_on("gloo@2020-03-17", when="@1.6+gloo") | ||
depends_on("gloo+cuda", when="@1.6:+gloo+cuda") | ||
depends_on("gloo+libuv", when="@1.6: platform=darwin") | ||
depends_on("nccl", when="+nccl+cuda") | ||
# https://github.com/pytorch/pytorch/issues/60331 | ||
# depends_on("[email protected]", when="@2.3:+onnx_ml") | ||
|
@@ -278,6 +275,8 @@ class PyTorch(PythonPackage, CudaPackage, ROCmPackage): | |
depends_on("miopen-hip") | ||
depends_on("rocminfo") | ||
depends_on("mpi", when="+mpi") | ||
depends_on("ucc", when="+ucc") | ||
depends_on("ucx", when="+ucc") | ||
depends_on("mkl", when="+mkldnn") | ||
|
||
# Test dependencies | ||
|
@@ -591,6 +590,7 @@ def enable_or_disable(variant, keyword="USE", var=None): | |
enable_or_disable("mkldnn") | ||
enable_or_disable("distributed") | ||
enable_or_disable("mpi") | ||
enable_or_disable("ucc") | ||
# cmake/Modules/FindGloo.cmake | ||
enable_or_disable("gloo") | ||
enable_or_disable("tensorpipe") | ||
|
@@ -661,11 +661,9 @@ def enable_or_disable(variant, keyword="USE", var=None): | |
env.set("USE_SYSTEM_PTHREADPOOL", "ON") | ||
env.set("USE_SYSTEM_PYBIND11", "ON") | ||
env.set("USE_SYSTEM_SLEEF", "ON") | ||
# env.set("USE_SYSTEM_TBB", "ON") | ||
# env.set("USE_SYSTEM_UCC", "ON") | ||
env.set("USE_SYSTEM_UCC", "ON") | ||
# https://github.com/pytorch/pytorch/issues/60332 | ||
# env.set("USE_SYSTEM_XNNPACK", "ON") | ||
# env.set("USE_SYSTEM_ZSTD", "ON") | ||
|
||
if self.spec.satisfies("+custom-protobuf"): | ||
env.set("BUILD_CUSTOM_PROTOBUF", "ON") | ||
|