diff --git a/CHANGES b/CHANGES index c7480879f2c..128e6bd83b0 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,17 @@ +=============================================================================== + Changes in 3.4.1 +=============================================================================== + +# Fix bug in GPU memory hooks that caused failures with NCCL + +# Fix bug in GPU-aware nonblocking op collectives + +# Hide symbols from embedded json-c module + +# Removed anonymous struct/union usage in MPL + +# Disable Java support in embedded UCX module + =============================================================================== Changes in 3.4 =============================================================================== diff --git a/configure.ac b/configure.ac index e2ffe9d522f..dd7b4d7c642 100644 --- a/configure.ac +++ b/configure.ac @@ -1191,7 +1191,7 @@ AC_SUBST([jsonlib]) PAC_PUSH_ALL_FLAGS() PAC_RESET_ALL_FLAGS() -PAC_CONFIG_SUBDIR_ARGS([modules/json-c],[--disable-werror],[],[AC_MSG_ERROR(json-c configure failed)]) +PAC_CONFIG_SUBDIR_ARGS([modules/json-c],[--enable-embedded --disable-werror],[],[AC_MSG_ERROR(json-c configure failed)]) PAC_POP_ALL_FLAGS() jsonsrcdir="${main_top_builddir}/modules/json-c" jsonlib="${main_top_builddir}/modules/json-c/libjson-c.la" diff --git a/maint/version.m4 b/maint/version.m4 index aa771df22ac..e3bc8d9e7bf 100644 --- a/maint/version.m4 +++ b/maint/version.m4 @@ -14,7 +14,7 @@ # changing this by playing with diversions, but then we would probably be # playing with autotools-fire. -m4_define([MPICH_VERSION_m4],[3.4])dnl +m4_define([MPICH_VERSION_m4],[3.4.1])dnl m4_define([MPICH_RELEASE_DATE_m4],[unreleased development copy])dnl # For libtool ABI versioning rules see: @@ -35,6 +35,6 @@ m4_define([MPICH_RELEASE_DATE_m4],[unreleased development copy])dnl # libmpi so version only includes functionality defined in the MPI # standard, and does not include MPIX_ functions and C++ bindings. -m4_define([libmpi_so_version_m4],[13:9:1])dnl +m4_define([libmpi_so_version_m4],[13:10:1])dnl [#] end of __file__ diff --git a/modules/json-c b/modules/json-c index 366f1c6c0ea..c496a735ec7 160000 --- a/modules/json-c +++ b/modules/json-c @@ -1 +1 @@ -Subproject commit 366f1c6c0ea2ca2f1077c1296f5cb744336fac38 +Subproject commit c496a735ec7312a22c1484ac6950f12fc8712b2f diff --git a/src/include/mpir_gpu.h b/src/include/mpir_gpu.h index de790a8632f..a3c06d37788 100644 --- a/src/include/mpir_gpu.h +++ b/src/include/mpir_gpu.h @@ -33,6 +33,14 @@ extern int MPIR_CVAR_ENABLE_GPU; +#undef ENABLE_GPU + +#ifdef MPL_HAVE_GPU +#define ENABLE_GPU MPIR_CVAR_ENABLE_GPU +#else +#define ENABLE_GPU FALSE +#endif + MPL_STATIC_INLINE_PREFIX int MPIR_GPU_query_pointer_attr(const void *ptr, MPL_pointer_attr_t * attr) { int mpi_errno = MPI_SUCCESS, mpl_err = MPL_SUCCESS; @@ -40,7 +48,7 @@ MPL_STATIC_INLINE_PREFIX int MPIR_GPU_query_pointer_attr(const void *ptr, MPL_po /* Skip query if GPU support is disabled by CVAR. Because we assume * no GPU buffer is used. If the user disables GPU at configure time * (e.g., --without-cuda), then MPL fallback will handle the query. */ - if (MPIR_CVAR_ENABLE_GPU) { + if (ENABLE_GPU) { mpl_err = MPL_gpu_query_pointer_attr(ptr, attr); MPIR_ERR_CHKANDJUMP(mpl_err != MPL_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**gpu_query_ptr"); } else { @@ -56,7 +64,7 @@ MPL_STATIC_INLINE_PREFIX int MPIR_GPU_query_pointer_attr(const void *ptr, MPL_po MPL_STATIC_INLINE_PREFIX bool MPIR_GPU_query_pointer_is_dev(const void *ptr) { - if (MPIR_CVAR_ENABLE_GPU && ptr != NULL) { + if (ENABLE_GPU && ptr != NULL) { MPL_pointer_attr_t attr; MPL_gpu_query_pointer_attr(ptr, &attr); diff --git a/src/mpi/coll/src/coll_impl.c b/src/mpi/coll/src/coll_impl.c index 0af50c61749..ffa6cc1d93d 100644 --- a/src/mpi/coll/src/coll_impl.c +++ b/src/mpi/coll/src/coll_impl.c @@ -218,7 +218,7 @@ void MPIR_Coll_host_buffer_alloc(const void *sendbuf, const void *recvbuf, MPI_A MPI_Aint extent = 0; if (sendbuf != MPI_IN_PLACE) { - MPL_gpu_query_pointer_attr(sendbuf, &attr); + MPIR_GPU_query_pointer_attr(sendbuf, &attr); if (attr.type == MPL_GPU_POINTER_DEV) { MPI_Aint true_extent; MPI_Aint true_lb; @@ -233,7 +233,7 @@ void MPIR_Coll_host_buffer_alloc(const void *sendbuf, const void *recvbuf, MPI_A } } - MPL_gpu_query_pointer_attr(recvbuf, &attr); + MPIR_GPU_query_pointer_attr(recvbuf, &attr); if (attr.type == MPL_GPU_POINTER_DEV) { if (!extent) { MPI_Aint true_extent; diff --git a/src/mpi/datatype/typerep/src/typerep_yaksa_pack.c b/src/mpi/datatype/typerep/src/typerep_yaksa_pack.c index 068ade08f70..b7c241485bb 100644 --- a/src/mpi/datatype/typerep/src/typerep_yaksa_pack.c +++ b/src/mpi/datatype/typerep/src/typerep_yaksa_pack.c @@ -20,8 +20,8 @@ int MPIR_Typerep_copy(void *outbuf, const void *inbuf, MPI_Aint num_bytes) } MPL_pointer_attr_t inattr, outattr; - MPL_gpu_query_pointer_attr(inbuf, &inattr); - MPL_gpu_query_pointer_attr(outbuf, &outattr); + MPIR_GPU_query_pointer_attr(inbuf, &inattr); + MPIR_GPU_query_pointer_attr(outbuf, &outattr); if ((inattr.type == MPL_GPU_POINTER_UNREGISTERED_HOST || inattr.type == MPL_GPU_POINTER_REGISTERED_HOST) && @@ -63,8 +63,8 @@ static inline bool fastpath_memcpy(const void *inbuf, void *outbuf, MPI_Datatype * true_lb) into the MPIR_Datatype structure */ if (HANDLE_IS_BUILTIN(type)) { MPL_pointer_attr_t inattr, outattr; - MPL_gpu_query_pointer_attr(inbuf, &inattr); - MPL_gpu_query_pointer_attr(outbuf, &outattr); + MPIR_GPU_query_pointer_attr(inbuf, &inattr); + MPIR_GPU_query_pointer_attr(outbuf, &outattr); if ((inattr.type == MPL_GPU_POINTER_UNREGISTERED_HOST || inattr.type == MPL_GPU_POINTER_REGISTERED_HOST) && @@ -86,11 +86,11 @@ static inline bool fastpath_memcpy(const void *inbuf, void *outbuf, MPI_Datatype MPL_pointer_attr_t inattr, outattr; if (dir == MEMCPY_DIR__PACK) { - MPL_gpu_query_pointer_attr((const char *) inbuf + dtp->true_lb + offset, &inattr); - MPL_gpu_query_pointer_attr(outbuf, &outattr); + MPIR_GPU_query_pointer_attr((const char *) inbuf + dtp->true_lb + offset, &inattr); + MPIR_GPU_query_pointer_attr(outbuf, &outattr); } else { - MPL_gpu_query_pointer_attr(inbuf, &inattr); - MPL_gpu_query_pointer_attr((char *) outbuf + dtp->true_lb + offset, &outattr); + MPIR_GPU_query_pointer_attr(inbuf, &inattr); + MPIR_GPU_query_pointer_attr((char *) outbuf + dtp->true_lb + offset, &outattr); } if ((inattr.type == MPL_GPU_POINTER_UNREGISTERED_HOST || diff --git a/src/mpi/misc/utils.c b/src/mpi/misc/utils.c index 20059b35a0e..d64f1f8fedd 100644 --- a/src/mpi/misc/utils.c +++ b/src/mpi/misc/utils.c @@ -66,8 +66,8 @@ int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtyp intptr_t sfirst; intptr_t rfirst; - MPL_gpu_query_pointer_attr(sendbuf, &send_attr); - MPL_gpu_query_pointer_attr(recvbuf, &recv_attr); + MPIR_GPU_query_pointer_attr(sendbuf, &send_attr); + MPIR_GPU_query_pointer_attr(recvbuf, &recv_attr); if (send_attr.type == MPL_GPU_POINTER_DEV && recv_attr.type == MPL_GPU_POINTER_DEV) { MPL_gpu_malloc((void **) &buf, COPY_BUFFER_SZ, recv_attr.device); diff --git a/src/mpid/ch4/src/ch4_impl.h b/src/mpid/ch4/src/ch4_impl.h index 83aee8a4c97..df7060e2b74 100644 --- a/src/mpid/ch4/src/ch4_impl.h +++ b/src/mpid/ch4/src/ch4_impl.h @@ -1025,7 +1025,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDIG_compute_acc_op(void *source_buf, int source_ void *in_targetbuf = target_buf; void *host_targetbuf = NULL; MPL_pointer_attr_t attr; - MPL_gpu_query_pointer_attr(target_buf, &attr); + MPIR_GPU_query_pointer_attr(target_buf, &attr); /* FIXME: use typerep/yaksa GPU-aware accumulate when available */ if (attr.type == MPL_GPU_POINTER_DEV) { MPI_Aint extent, true_extent; diff --git a/src/mpl/src/gpu/mpl_gpu_cuda.c b/src/mpl/src/gpu/mpl_gpu_cuda.c index 97328fa07b5..62b84ff0a59 100644 --- a/src/mpl/src/gpu/mpl_gpu_cuda.c +++ b/src/mpl/src/gpu/mpl_gpu_cuda.c @@ -284,9 +284,12 @@ int MPL_gpu_get_buffer_bounds(const void *ptr, void **pbase, uintptr_t * len) static void gpu_free_hooks_cb(void *dptr) { gpu_free_hook_s *current = free_hook_chain; - while (current) { - current->free_hook(dptr); - current = current->next; + if (dptr != NULL) { + /* we call gpu hook only when dptr != NULL */ + while (current) { + current->free_hook(dptr); + current = current->next; + } } return; }