Skip to content

Commit

Permalink
Updates the PSM2 library to match release 11.2.185
Browse files Browse the repository at this point in the history
Corresponds to IFS release OPENIB_INSTALL_LINUX_opa-10_10_10_10_3_0_12

Changes:
* Add support for clang.
* Add support for gcc 10.
* Change default value of MQ_HFI_THRESH_EGR_SDMA_SQ_XEON to 16384.
* am_cuda_memhandle_cache: add cache clear counter.
* Other minor fixes to am_cuda_memhandle_cache code.
* Update contributors.

Signed-off-by: Brendan Cunningham <[email protected]>
  • Loading branch information
Brendan Cunningham committed Jul 21, 2020
1 parent be11661 commit 7a33bed
Show file tree
Hide file tree
Showing 17 changed files with 202 additions and 145 deletions.
2 changes: 1 addition & 1 deletion COMMIT
Original file line number Diff line number Diff line change
@@ -1 +1 @@
61553edd6b9fefce4a246b4701abc213e7c36b2b
30c52a0fd155774e18cc06328a1ba83c2a6a8104
1 change: 1 addition & 0 deletions CONTRIBUTORS
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ Bernhard M. Wiedemann (bmwidemann on github.com)
Dmitry (dmitrygx on github.com)
Florian Weimer (fweimer on github.com)
Jonas Hahnfeld (hahnjo on github.com)
Tom Stellard (tstellar on github.com)
7 changes: 0 additions & 7 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -220,13 +220,6 @@ Note: It is also possible to use rpm command to install rpm's, but it is recomme
that one use yum/dnf as rpm tool has issues with name changes and obsoletes tags.
yum or dnf should be better able to resolve dependency issues.

TESTING
=======

Please see the subdirectory psm_test, starting with the file: psm_test/README for
code and instructions on testing the psm2 library. Additionally, the
directory: psm_test/samples contains sample code to test.

RELATED SOFTWARE TO PSM2
========================

Expand Down
127 changes: 60 additions & 67 deletions buildflags.mak
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,11 @@ endif
export os ?= $(shell uname -s | tr '[A-Z]' '[a-z]')
export arch := $(shell uname -m | sed -e 's,\(i[456]86\|athlon$$\),i386,')

ifeq (${CCARCH},gcc)
export CC := gcc
ifeq (${CCARCH},$(filter ${CCARCH},gcc gcc4 icc clang))
export CC := ${CCARCH}
else
ifeq (${CCARCH},gcc4)
export CC := gcc4
else
ifeq (${CCARCH},icc)
export CC := icc
else
anerr := $(error Unknown C compiler arch: ${CCARCH})
endif # ICC
endif # gcc4
endif # gcc
anerr := $(error Unknown C compiler arch: ${CCARCH})
endif

ifeq (${FCARCH},gfortran)
export FC := gfortran
Expand Down Expand Up @@ -108,48 +100,48 @@ BASECFLAGS +=-Wall $(WERROR)
# test if compiler supports 32B(AVX2)/64B(AVX512F) move instruction.
#
ifeq (${CC},icc)
ifeq ($(PSM_DISABLE_AVX2),)
MAVX2=-xATOM_SSE4.2 -DPSM_AVX512
else
MAVX2=-march=core-avx-i
endif
ifeq ($(PSM_DISABLE_AVX2),)
MAVX2=-xATOM_SSE4.2 -DPSM_AVX512
else
MAVX2=-march=core-avx-i
endif
else
ifeq ($(PSM_DISABLE_AVX2),)
MAVX2=-mavx2
else
MAVX2=-mavx
endif
ifeq ($(PSM_DISABLE_AVX2),)
MAVX2=-mavx2
else
MAVX2=-mavx
endif
endif

ifneq (icc,${CC})
ifeq ($(PSM_DISABLE_AVX2),)
RET := $(shell echo "int main() {}" | ${CC} ${MAVX2} -E -dM -xc - 2>&1 | grep -q AVX2 ; echo $$?)
else
RET := $(shell echo "int main() {}" | ${CC} ${MAVX2} -E -dM -xc - 2>&1 | grep -q AVX ; echo $$?)
$(warning ***NOTE TO USER**** Disabling AVX2 will harm performance)
endif

ifeq (0,${RET})
BASECFLAGS += ${MAVX2}
else
$(error Compiler does not support ${MAVX2} )
endif
ifeq ($(PSM_DISABLE_AVX2),)
RET := $(shell echo "int main() {}" | ${CC} ${MAVX2} -E -dM -xc - 2>&1 | grep -q AVX2 ; echo $$?)
else
RET := $(shell echo "int main() {}" | ${CC} ${MAVX2} -E -dM -xc - 2>&1 | grep -q AVX ; echo $$?)
$(warning ***NOTE TO USER**** Disabling AVX2 will harm performance)
endif

ifeq (0,${RET})
BASECFLAGS += ${MAVX2}
else
$(error Compiler does not support ${MAVX2} )
endif
else
BASECFLAGS += ${MAVX2}
BASECFLAGS += ${MAVX2}
endif

# This support is dynamic at runtime, so is OK to enable as long as compiler can generate
# the code.
ifneq (,${PSM_AVX512})
ifneq (icc,${CC})
RET := $(shell echo "int main() {}" | ${CC} -mavx512f -E -dM -xc - 2>&1 | grep -q AVX512 ; echo $$?)
ifeq (0,${RET})
BASECFLAGS += -mavx512f
else
$(error Compiler does not support AVX512 )
endif
BASECFLAGS += -DPSM_AVX512
endif
ifneq (icc,${CC})
RET := $(shell echo "int main() {}" | ${CC} -mavx512f -E -dM -xc - 2>&1 | grep -q AVX512 ; echo $$?)
ifeq (0,${RET})
BASECFLAGS += -mavx512f
else
$(error Compiler does not support AVX512 )
endif
BASECFLAGS += -DPSM_AVX512
endif
endif

#
Expand All @@ -158,42 +150,42 @@ endif
BASECFLAGS += -D_DEFAULT_SOURCE -D_SVID_SOURCE -D_BSD_SOURCE

ifneq (,${HFI_BRAKE_DEBUG})
BASECFLAGS += -DHFI_BRAKE_DEBUG
BASECFLAGS += -DHFI_BRAKE_DEBUG
endif
ifneq (,${PSM_FI})
BASECFLAGS += -DPSM_FI
BASECFLAGS += -DPSM_FI
endif
ifneq (,${PSM_DEBUG})
BASECFLAGS += -O -g3 -DPSM_DEBUG -D_HFI_DEBUGGING -funit-at-a-time -Wp,-D_FORTIFY_SOURCE=2
BASECFLAGS += -O -g3 -DPSM_DEBUG -D_HFI_DEBUGGING -funit-at-a-time -Wp,-D_FORTIFY_SOURCE=2
else
BASECFLAGS += -O3 -g3
BASECFLAGS += -O3 -g3
endif
ifneq (,${PSM_COVERAGE}) # This check must come after PSM_DEBUG to override optimization setting
BASECFLAGS += -O -fprofile-arcs -ftest-coverage
LDFLAGS += -fprofile-arcs
BASECFLAGS += -O -fprofile-arcs -ftest-coverage
LDFLAGS += -fprofile-arcs
endif
ifneq (,${PSM_LOG})
BASECFLAGS += -DPSM_LOG
BASECFLAGS += -DPSM_LOG
ifneq (,${PSM_LOG_FAST_IO})
BASECFLAGS += -DPSM_LOG_FAST_IO
PSM2_ADDITIONAL_GLOBALS += psmi_log_fini;psmi_log_message;
BASECFLAGS += -DPSM_LOG_FAST_IO
PSM2_ADDITIONAL_GLOBALS += psmi_log_fini;psmi_log_message;
endif
endif
ifneq (,${PSM_PERF})
BASECFLAGS += -DRDPMC_PERF_FRAMEWORK
BASECFLAGS += -DRDPMC_PERF_FRAMEWORK
endif
ifneq (,${PSM_HEAP_DEBUG})
BASECFLAGS += -DPSM_HEAP_DEBUG
PSM2_ADDITIONAL_GLOBALS += _psmi_heapdebug_val_heapallocs;
BASECFLAGS += -DPSM_HEAP_DEBUG
PSM2_ADDITIONAL_GLOBALS += _psmi_heapdebug_val_heapallocs;
endif
ifneq (,${PSM_PROFILE})
BASECFLAGS += -DPSM_PROFILE
BASECFLAGS += -DPSM_PROFILE
endif
BASECFLAGS += -DNVIDIA_GPU_DIRECT
ifneq (,${PSM_CUDA})
BASECFLAGS += -DPSM_CUDA
CUDA_HOME ?= /usr/local/cuda
INCLUDES += -I$(CUDA_HOME)/include
BASECFLAGS += -DPSM_CUDA
CUDA_HOME ?= /usr/local/cuda
INCLUDES += -I$(CUDA_HOME)/include
endif

BASECFLAGS += -fpic -fPIC -D_GNU_SOURCE
Expand All @@ -203,15 +195,16 @@ ASFLAGS += -g3 -fpic
BASECFLAGS += ${OPA_CFLAGS}

ifeq (${CCARCH},icc)
BASECFLAGS += -fpic -fPIC -D_GNU_SOURCE -DPACK_STRUCT_STL=packed,
LDFLAGS += -static-intel
BASECFLAGS += -fpic -fPIC -D_GNU_SOURCE -DPACK_STRUCT_STL=packed,
LDFLAGS += -static-intel
else
ifeq (${CCARCH},gcc)
BASECFLAGS += -funwind-tables -Wno-strict-aliasing -Wformat-security
LDFLAGS += -Wl,--build-id
ifeq (${CCARCH},$(filter ${CCARCH},gcc clang))
BASECFLAGS += -funwind-tables -Wno-strict-aliasing -Wformat-security
else
ifneq (${CCARCH},gcc4)
$(error Unknown compiler arch "${CCARCH}")
endif # gcc4
ifneq (${CCARCH},gcc4)
$(error Unknown compiler arch "${CCARCH}")
endif # gcc4
endif # gcc
endif # icc

Expand Down
18 changes: 5 additions & 13 deletions compat/buildflags.mak
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,11 @@ export os ?= $(shell uname -s | tr '[A-Z]' '[a-z]')
export arch := $(shell uname -m | sed -e 's,\(i[456]86\|athlon$$\),i386,')
export CCARCH ?= gcc

ifeq (${CCARCH},gcc)
export CC := gcc
ifeq (${CCARCH},$(filter ${CCARCH},gcc gcc4 icc clang))
export CC := ${CCARCH}
else
ifeq (${CCARCH},gcc4)
export CC := gcc4
else
ifeq (${CCARCH},icc)
export CC := icc
else
anerr := $(error Unknown C compiler arch: ${CCARCH})
endif # ICC
endif # gcc4
endif # gcc
anerr := $(error Unknown C compiler arch: ${CCARCH})
endif

BASECFLAGS += $(BASE_FLAGS)
LDFLAGS += $(BASE_FLAGS)
Expand All @@ -90,7 +82,7 @@ ifeq (${CCARCH},icc)
BASECFLAGS += -O3 -g3
LDFLAGS += -static-intel
else
ifeq (${CCARCH},gcc)
ifeq (${CCARCH},$(filter ${CCARCH},gcc clang))
BASECFLAGS += -Wno-strict-aliasing
else
ifneq (${CCARCH},gcc4)
Expand Down
2 changes: 1 addition & 1 deletion libpsm2.spec.in
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ Obsoletes: hfi1-psm < 1.0.0

%if "@RPM_NAME_BASEEXT@"
%package -n @RPM_NAME@@RPM_NAME_BASEEXT@
%endif
Summary: Intel PSM2 Libraries
%endif
Provides: @RPM_NAME@ = %{version}-%{release}
Provides: @RPM_NAME@%{_isa} = %{version}-%{release}
%if 0%{?suse_version}
Expand Down
38 changes: 38 additions & 0 deletions psm.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ uint64_t *shared_affinity_ptr;
char *sem_affinity_shm_rw_name;
char *affinity_shm_name;

uint32_t psmi_cpu_model;

#ifdef PSM_CUDA
int is_cuda_enabled;
int is_gdr_copy_enabled;
Expand All @@ -99,6 +101,42 @@ int is_cuda_primary_context_retain = 0;
uint32_t cuda_thresh_rndv;
uint32_t gdr_copy_threshold_send;
uint32_t gdr_copy_threshold_recv;

void *psmi_cuda_lib;
CUresult (*psmi_cuInit)(unsigned int Flags );
CUresult (*psmi_cuCtxDetach)(CUcontext c);
CUresult (*psmi_cuCtxGetCurrent)(CUcontext *c);
CUresult (*psmi_cuCtxSetCurrent)(CUcontext c);
CUresult (*psmi_cuPointerGetAttribute)(void *data, CUpointer_attribute pa, CUdeviceptr p);
CUresult (*psmi_cuPointerSetAttribute)(void *data, CUpointer_attribute pa, CUdeviceptr p);
CUresult (*psmi_cuDeviceCanAccessPeer)(int *canAccessPeer, CUdevice dev, CUdevice peerDev);
CUresult (*psmi_cuDeviceGet)(CUdevice* device, int ordinal);
CUresult (*psmi_cuDeviceGetAttribute)(int* pi, CUdevice_attribute attrib, CUdevice dev);
CUresult (*psmi_cuDriverGetVersion)(int* driverVersion);
CUresult (*psmi_cuDeviceGetCount)(int* count);
CUresult (*psmi_cuStreamCreate)(CUstream* phStream, unsigned int Flags);
CUresult (*psmi_cuStreamDestroy)(CUstream phStream);
CUresult (*psmi_cuEventCreate)(CUevent* phEvent, unsigned int Flags);
CUresult (*psmi_cuEventDestroy)(CUevent hEvent);
CUresult (*psmi_cuEventQuery)(CUevent hEvent);
CUresult (*psmi_cuEventRecord)(CUevent hEvent, CUstream hStream);
CUresult (*psmi_cuEventSynchronize)(CUevent hEvent);
CUresult (*psmi_cuMemHostAlloc)(void** pp, size_t bytesize, unsigned int Flags);
CUresult (*psmi_cuMemFreeHost)(void* p);
CUresult (*psmi_cuMemcpy)(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount);
CUresult (*psmi_cuMemcpyDtoD)(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
CUresult (*psmi_cuMemcpyDtoH)(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount);
CUresult (*psmi_cuMemcpyHtoD)(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount);
CUresult (*psmi_cuMemcpyDtoHAsync)(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
CUresult (*psmi_cuMemcpyHtoDAsync)(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount, CUstream hStream);
CUresult (*psmi_cuIpcGetMemHandle)(CUipcMemHandle* pHandle, CUdeviceptr dptr);
CUresult (*psmi_cuIpcOpenMemHandle)(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned int Flags);
CUresult (*psmi_cuIpcCloseMemHandle)(CUdeviceptr dptr);
CUresult (*psmi_cuMemGetAddressRange)(CUdeviceptr* pbase, size_t* psize, CUdeviceptr dptr);
CUresult (*psmi_cuDevicePrimaryCtxGetState)(CUdevice dev, unsigned int* flags, int* active);
CUresult (*psmi_cuDevicePrimaryCtxRetain)(CUcontext* pctx, CUdevice dev);
CUresult (*psmi_cuCtxGetDevice)(CUdevice* device);
CUresult (*psmi_cuDevicePrimaryCtxRelease)(CUdevice device);
#endif

/*
Expand Down
2 changes: 1 addition & 1 deletion psm_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@
#define MQ_HFI_THRESH_TINY 8
#define MQ_HFI_THRESH_EGR_SDMA_XEON 34000 /* Eager Xeon blocking */
#define MQ_HFI_THRESH_EGR_SDMA_PHI2 200000 /* Eager Phi2 blocking */
#define MQ_HFI_THRESH_EGR_SDMA_SQ_XEON 16000 /* Eager Xeon non-blocking */
#define MQ_HFI_THRESH_EGR_SDMA_SQ_XEON 16384 /* Eager Xeon non-blocking */
#define MQ_HFI_THRESH_EGR_SDMA_SQ_PHI2 65536 /* Eager Phi2 non-blocking */

#define MQ_HFI_THRESH_RNDV_PHI2 200000
Expand Down
2 changes: 1 addition & 1 deletion psm_error.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
#define PSMI_EP_NORETURN ((psm2_ep_t) -2)
#define PSMI_EP_LOGEVENT ((psm2_ep_t) -3)

psm2_ep_errhandler_t psmi_errhandler_global;
extern psm2_ep_errhandler_t psmi_errhandler_global;

psm2_error_t MOCKABLE(psmi_handle_error)(psm2_ep_t ep, psm2_error_t error,
const char *buf, ...)
Expand Down
5 changes: 1 addition & 4 deletions psm_hal_gen1/psm_gdrcpy.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,6 @@

static int gdr_fd;

int is_gdr_copy_enabled;


int get_gdr_fd(){
return gdr_fd;
}
Expand Down Expand Up @@ -175,7 +172,7 @@ gdr_convert_gpu_to_host_addr(int gdr_fd, unsigned long buf,
((buf + size - 1) & GPU_PAGE_MASK) -
pageaddr);

_HFI_VDBG("buf=%p size=%zu pageaddr=%p pagelen=%u flags=0x%x proto=%p\n",
_HFI_VDBG("(gpudirect) buf=%p size=%zu pageaddr=%p pagelen=%u flags=0x%x proto=%p\n",
(void *)buf, size, (void *)pageaddr, pagelen, flags, proto);

query_params.query_params_in.gpu_buf_addr = pageaddr;
Expand Down
2 changes: 1 addition & 1 deletion psm_hal_gen1/psm_hal_inline_i.h
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ static PSMI_HAL_INLINE int hfp_gen1_context_open(int unit,
unsigned retryCnt)
{
int fd = -1;
psm2_error_t err = PSM_HAL_ERROR_OK;
psm2_error_t err = PSM2_OK;
hfp_gen1_pc_private *pc_private = psmi_malloc(ep, UNDEFINED, sizeof(hfp_gen1_pc_private));

if_pf (!pc_private) {
Expand Down
Loading

0 comments on commit 7a33bed

Please sign in to comment.