-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Brendan Cunningham <[email protected]>
- Loading branch information
0 parents
commit 1bfac8f
Showing
7 changed files
with
1,119 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.nfs* | ||
.*.sw? |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
# BEGIN_ICS_COPYRIGHT8 **************************************** | ||
# | ||
# Copyright (c) 2021, Cornelis Networks | ||
# | ||
# Redistribution and use in source and binary forms, with or without | ||
# modification, are permitted provided that the following conditions are met: | ||
# | ||
# * Redistributions of source code must retain the above copyright notice, | ||
# this list of conditions and the following disclaimer. | ||
# * Redistributions in binary form must reproduce the above copyright | ||
# notice, this list of conditions and the following disclaimer in the | ||
# documentation and/or other materials provided with the distribution. | ||
# * Neither the name of Cornelis Networks nor the names of its contributors | ||
# may be used to endorse or promote products derived from this software | ||
# without specific prior written permission. | ||
# | ||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE | ||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
# | ||
# END_ICS_COPYRIGHT8 **************************************** | ||
|
||
#[ICS VERSION STRING: unknown] | ||
|
||
PSM2_INCLUDE := /usr/include | ||
|
||
# psm2-nccl requires nccl_net.h, nccl_net.h is in a nccl development clone. | ||
NCCL_SRC_DIR := ../nccl/src | ||
NCCL_INCLUDE := $(NCCL_SRC_DIR)/include | ||
CUDA_INCLUDE := /usr/local/cuda/include | ||
|
||
BUILDDIR := . | ||
SONAME := $(BUILDDIR)/libnccl-net.so | ||
LIBSRC := src/psm2_nccl_net.c src/hfi_sysclass.c | ||
LIBOBJS := $(LIBSRC:.c=.o) | ||
|
||
INCLUDES := -Isrc/include -I$(PSM2_INCLUDE) -I$(NCCL_INCLUDE) -I$(CUDA_INCLUDE) | ||
DEBUG := 1 | ||
|
||
.PHONY : all clean | ||
.DEFAULT_GOAL := all | ||
|
||
BASECFLAGS := -Wall | ||
ifeq ($(DEBUG),1) | ||
BASECFLAGS += -g -Og | ||
else | ||
BASECFLAGS += -O2 | ||
endif | ||
|
||
BASELDFLAGS := -lpsm2 -lnccl | ||
|
||
%.o : %.c | ||
$(CC) $(INCLUDES) $(CFLAGS) $(BASECFLAGS) -c -fPIC -o $@ $^ | ||
|
||
$(SONAME) : $(LIBOBJS) | ||
$(LD) -shared $(BASELDFLAGS) $(LDFLAGS) -o $@ $^ | ||
|
||
all : $(SONAME) | ||
|
||
clean : | ||
-rm $(SONAME) $(LIBOBJS) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
# BEGIN_ICS_COPYRIGHT8 **************************************** | ||
# | ||
# Copyright (c) 2021, Cornelis Networks | ||
# | ||
# Redistribution and use in source and binary forms, with or without | ||
# modification, are permitted provided that the following conditions are met: | ||
# | ||
# * Redistributions of source code must retain the above copyright notice, | ||
# this list of conditions and the following disclaimer. | ||
# * Redistributions in binary form must reproduce the above copyright | ||
# notice, this list of conditions and the following disclaimer in the | ||
# documentation and/or other materials provided with the distribution. | ||
# * Neither the name of Cornelis Networks nor the names of its contributors | ||
# may be used to endorse or promote products derived from this software | ||
# without specific prior written permission. | ||
# | ||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE | ||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
# | ||
# END_ICS_COPYRIGHT8 **************************************** | ||
|
||
#[ICS VERSION STRING: unknown] | ||
|
||
README contents: | ||
- DEPENDENCIES | ||
- BUILDING | ||
- RUNNING | ||
- NCCL ranks, PSM2 endpoints, and hfi1 num_user_contexts | ||
|
||
DEPENDENCIES | ||
============ | ||
|
||
libpsm2 w/cuda support installed on GPU nodes | ||
hfi1-gpudirect | ||
NCCL 2.8.3 must be installed on GPU nodes | ||
NCCL development clone to build psm2-nccl plugin; available here - https://github.com/NVIDIA/nccl.git | ||
* Must also build NCCL development clone in order to generate headers needed to build PSM2-NCCL. | ||
|
||
BUILDING | ||
======== | ||
cd into psm2-nccl clone and do "make". | ||
|
||
Set 'BUILDDIR' make variable to control where libnccl-net.so goes. By default, BUILDDIR is '.'. | ||
|
||
See Makefile for other directory and build variables. | ||
|
||
RUNNING | ||
======= | ||
Add directory containing psm2-nccl libnccl-net.so to LD_LIBRARY_PATH and ensure LD_LIBRARY_PATH is exported to rank environments. | ||
|
||
Run your NCCL app using OpenMPI mpirun, e.g.: | ||
mpirun -np 2 --map-by ppr:1:node -host <host1>,<host2> -x PSM2_CUDA=1 -x PSM2_GPUDIRECT=1 -x PSM2_GDRCOPY=0 -x PSM2_MULTI_EP=1 -x LD_LIBRARY_PATH -x NCCL_NET_GDR_LEVEL=5 build/all_reduce_perf | ||
|
||
Environment variable notes: | ||
* PSM2_MULTI_EP=1 must be set in all ranks' environments. Not doing so may cause the job to fail on a psm2_ep_open() with the error 'PSM2_TOO_MANY_ENDPOINTS'. | ||
* PSM2_CUDA=1, PSM2_GPUDIRECT must be set in all ranks' environments for PSM2-NCCL to use GPUDirect. | ||
* PSM2_RCVTHREAD=0 is not supported with PSM2-NCCL. | ||
* PSM2-NCCL uses NCCL's logging system for debug output. All PSM2-NCCL output is logged under the NCCL 'NET' logging subsystem. To get PSM2-NCCL debug output, add '-x NCCL_DEBUG=INFO -x NCCL_DEBUG_SUBSYS=NET -x PSM2_NCCL_LOG_LEVEL=2' to your mpirun line. See src/include/psm2_nccl_debug.h for PSM2-NCCL log levels. | ||
|
||
PSM2-NCCL only supports OpenMPI at present. You may use CUDA-aware or CUDA-naive OpenMPI but per this (https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/mpi.html#inter-gpu-communication-with-cuda-aware-mpi), using NCCL and CUDA-aware MPI operations concurrently may cause deadlocks. | ||
|
||
PSM2 does not support more than one GPU per host process and so PSM2-NCCL similarly does not support more than one GPU per host process. | ||
|
||
NCCL ranks, PSM2 endpoints, and hfi1 num_user_contexts | ||
====================================================== | ||
PSM2-NCCL uses one PSM2 endpoint for each NCCL communicator object. Each NCCL rank requires one send and one receive NCCL communicator object to establish bidirectional communications with a remote NCCL rank. As NCCL typically requires an all-to-all communication pattern, the size of a NCCL job with PSM2-NCCL is limited to (number of PSM2 endpoints)/2. | ||
|
||
The number of PSM2 endpoints is limited by the number of hfi1 contexts. Typically, hfi1 will have as many contexts as there are physical CPU cores on the system. The number of hfi1 contexts can be increased up to a limit of 160 using the 'num_user_contexts' kernel module parameter. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
/* BEGIN_ICS_COPYRIGHT7 **************************************** | ||
Copyright (c) 2021, Cornelis Networks | ||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions are met: | ||
* Redistributions of source code must retain the above copyright notice, | ||
this list of conditions and the following disclaimer. | ||
* Redistributions in binary form must reproduce the above copyright | ||
notice, this list of conditions and the following disclaimer in the | ||
documentation and/or other materials provided with the distribution. | ||
* Neither the name of Cornelis Networks nor the names of its contributors | ||
may be used to endorse or promote products derived from this software | ||
without specific prior written permission. | ||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE | ||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
** END_ICS_COPYRIGHT7 ****************************************/ | ||
|
||
/* [ICS VERSION STRING: unknown] */ | ||
|
||
#include "hfi_sysclass.h" | ||
#include "psm2_nccl_debug.h" | ||
|
||
#include <stdio.h> | ||
#include <stdlib.h> | ||
|
||
#include <limits.h> | ||
#include <fcntl.h> | ||
#include <sys/stat.h> | ||
#include <sys/types.h> | ||
#include <string.h> | ||
|
||
ssize_t hfi_sysclass_rd(int dev, const char *attr, char *out, size_t len) | ||
{ | ||
int fd; | ||
char path[PATH_MAX]; | ||
|
||
int prct = snprintf(path, PATH_MAX, HFI_SYSCLASS_FMT"/%s", dev, attr); | ||
if (prct >= PATH_MAX) | ||
return -1; | ||
|
||
fd = open(path, O_RDONLY); | ||
if (fd < 0) | ||
return -2; | ||
|
||
ssize_t rdct = read(fd, out, len); | ||
PSM_DBG("dev=%d,attr=%s,len=%zu,rdct=%zd", dev, attr, len, rdct); | ||
if (rdct < 0 || rdct >= len) { | ||
// Read error or too much data, no room for null-terminator | ||
close(fd); | ||
return -3; | ||
} | ||
|
||
out[rdct] = '\0'; | ||
PSM_DBG("dev=%d,attr=%s,out=%s", dev, attr, out); | ||
close(fd); | ||
return rdct; | ||
} | ||
|
||
// hfi1_<dev>/uevent should be of the form NAME=<devname> | ||
static const size_t UEVENT_MAX = 256; | ||
|
||
int hfi_sysclass_get_devname(int dev, char **name) | ||
{ | ||
char tmpname[UEVENT_MAX]; | ||
*name = NULL; | ||
|
||
if (hfi_sysclass_rd(dev, "uevent", tmpname, UEVENT_MAX) < 0) { | ||
return -1; | ||
} | ||
|
||
size_t newlen = strnlen(tmpname, UEVENT_MAX); | ||
if (newlen < 5 || strncmp(tmpname, "NAME=", 5) != 0) | ||
return -2; | ||
|
||
*name = malloc(newlen + 1); | ||
if (!*name) | ||
return -3; | ||
strncpy(*name, tmpname + 5, newlen); | ||
return 1; | ||
} | ||
|
||
int hfi_sysclass_get_pciPath(int dev, char **path) | ||
{ | ||
char devpath[PATH_MAX]; | ||
struct stat sb = {0}; | ||
*path = NULL; | ||
|
||
int prct = snprintf(devpath, sizeof(devpath), HFI_SYSCLASS_FMT"/device", dev); | ||
if (prct >= PATH_MAX) | ||
return -1; | ||
|
||
// Test that devpath exists and is a symlink | ||
if (lstat(devpath, &sb) == -1) { | ||
return -2; | ||
} | ||
|
||
// No way to tell how long the resolved path will be, only safe option is to allocate PATH_MAX. | ||
*path = malloc(PATH_MAX); | ||
if (!*path) | ||
return -3; | ||
|
||
char *resolved = realpath(devpath, *path); | ||
if (!resolved) { | ||
free(*path); | ||
*path = NULL; | ||
return -4; | ||
} | ||
|
||
return 1; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
/* BEGIN_ICS_COPYRIGHT7 **************************************** | ||
Copyright (c) 2021, Cornelis Networks | ||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions are met: | ||
* Redistributions of source code must retain the above copyright notice, | ||
this list of conditions and the following disclaimer. | ||
* Redistributions in binary form must reproduce the above copyright | ||
notice, this list of conditions and the following disclaimer in the | ||
documentation and/or other materials provided with the distribution. | ||
* Neither the name of Cornelis Networks nor the names of its contributors | ||
may be used to endorse or promote products derived from this software | ||
without specific prior written permission. | ||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE | ||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
** END_ICS_COPYRIGHT7 ****************************************/ | ||
|
||
/* [ICS VERSION STRING: unknown] */ | ||
|
||
#ifndef _HFI_SYSCLASS_H_INCLUDED_ | ||
#define _HFI_SYSCLASS_H_INCLUDED_ 1 | ||
|
||
#include <unistd.h> | ||
|
||
#define HFI_SYSCLASS_FMT "/sys/class/infiniband/hfi1_%d" | ||
|
||
/** | ||
* Read up to @c len bytes from HFI_SYSCLASS_FMT_<dev>/<attr> | ||
* into @c out. Adds terminating '\0' to *out. It is an error if @c len | ||
* is not big enough to fit data from <attr> plus null-terminator. | ||
* | ||
* @return number of bytes read. | ||
* @retval >= 0 Success, 0 indicates EOF. @c out will be NULL-terminated either way. | ||
* @retval < 0, failure. Contents of @c are undefined. | ||
*/ | ||
extern ssize_t hfi_sysclass_rd(int dev, const char *attr, char *out, size_t len); | ||
|
||
/** | ||
* @return 1 on success, < 0 on failure. On success, @c *name will be NULL-terminated. | ||
* Contents of @c *name are undefined on failure. | ||
*/ | ||
extern int hfi_sysclass_get_devname(int dev, char **name); | ||
|
||
/** | ||
* @return 1 on success, <0 on failure. On success, @c *path will be NULL-terminated. | ||
* Contents of @c *path are undefined on failure. | ||
*/ | ||
extern int hfi_sysclass_get_pciPath(int dev, char **path); | ||
|
||
#endif /* _HFI_SYSCLASS_H_INCLUDED_ */ |
Oops, something went wrong.