Skip to content

Commit

Permalink
mpid/shm: Added support for moving copy buffer in MCDRAM
Browse files Browse the repository at this point in the history
This commit introduces runtime control on shared memory object
placement, more specifically copy buffers used by CH3 for the
rendezvous protocol, in the heterogeneous memory system. Runtime
binding is provided through an additional CVAR:
MPIR_CVAR_SHMEM_COPYBUF_BIND_TYPE.
By default this is set to "DDR" but can be set to "MCDRAM" to move
the aforementioned object into MCDRAM.
  • Loading branch information
Giuseppe Congiu committed Sep 26, 2018
1 parent c46f11d commit 1d6ca46
Showing 1 changed file with 43 additions and 8 deletions.
51 changes: 43 additions & 8 deletions src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt_shm.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,28 @@
#define DBG_LMT(x)
#endif

/*
=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===
categories:
- name : SHMEM_RENDEZVOUS
description : >-
CVARs controlling shared memory object binding for Rendezvous protocol.
cvars:
- name : MPIR_CVAR_SHMEM_COPYBUF_BIND_TYPE
category : SHMEM_RENDEZVOUS
type : string
default : "DDR"
class : none
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_GROUP
description : >-
The memory type to be used for allocating memory in a KNL system.
=== END_MPI_T_CVAR_INFO_BLOCK ===
*/

/* Progress queue */

typedef struct lmt_shm_prog_element
Expand Down Expand Up @@ -179,6 +201,19 @@ int MPID_nem_lmt_shm_start_recv(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV s_coo
mpi_errno = MPID_nem_allocate_shm_region(&vc_ch->lmt_copy_buf, vc_ch->lmt_copy_buf_handle);
if (mpi_errno) MPIR_ERR_POP(mpi_errno);

#ifdef HAVE_HWLOC
/* bind shared memory to HBM */
if (MPIR_Process.mcdramset_is_valid &&
strcmp(MPIR_CVAR_SHMEM_COPYBUF_BIND_TYPE, "MCDRAM") == 0) {
hwloc_set_area_membind(MPIR_Process.topology, vc_ch->lmt_copy_buf,
sizeof(MPID_nem_copy_buf_t),
(MPIR_Process.bindset_is_valid) ?
MPIR_Process.mcdram_sibling : MPIR_Process.
mcdram_nodes, HWLOC_MEMBIND_BIND,
HWLOC_MEMBIND_STRICT | HWLOC_MEMBIND_BYNODESET);
}
#endif

vc_ch->lmt_copy_buf->sender_present.val = 0;
vc_ch->lmt_copy_buf->receiver_present.val = 0;

Expand All @@ -193,7 +228,7 @@ int MPID_nem_lmt_shm_start_recv(MPIDI_VC_t *vc, MPIR_Request *req, MPL_IOV s_coo
/* send CTS with handle for copy buffer */
mpi_errno = MPL_shm_hnd_get_serialized_by_ref((vc_ch->lmt_copy_buf_handle), &ser_lmt_copy_buf_handle);
if(mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); }

MPID_nem_lmt_send_CTS(vc, req, ser_lmt_copy_buf_handle, (int)strlen(ser_lmt_copy_buf_handle) + 1);

queue_initially_empty = LMT_SHM_Q_EMPTY(vc_ch->lmt_queue) && vc_ch->lmt_active_lmt == NULL;
Expand Down Expand Up @@ -375,13 +410,13 @@ static int get_next_req(MPIDI_VC_t *vc)
/* remote side chooses next transfer */

OPA_read_barrier();

MPL_DBG_STMT(MPIDI_CH3_DBG_CHANNEL, VERBOSE, if (copy_buf->owner_info.val.remote_req_id == MPI_REQUEST_NULL)
MPL_DBG_MSG_D(MPIDI_CH3_DBG_CHANNEL, VERBOSE, "waiting for owner rank=%d", vc->pg_rank));

while (copy_buf->owner_info.val.remote_req_id == MPI_REQUEST_NULL) {
int p = 0;

if (p == LMT_POLLS_BEFORE_GIVING_UP)
goto fn_exit;
++p;
Expand Down Expand Up @@ -470,7 +505,7 @@ static int lmt_shm_send_progress(MPIDI_VC_t *vc, MPIR_Request *req, int *done)
waiting, otherwise wait for a bounded amount of time. */
while (copy_buf->len[buf_num].val != 0) {
int p = 0;

if (!copy_buf->receiver_present.val || p == LMT_POLLS_BEFORE_GIVING_UP) {
req->dev.segment_first = first;
vc_ch->lmt_buf_num = buf_num;
Expand Down Expand Up @@ -564,7 +599,7 @@ static int lmt_shm_recv_progress(MPIDI_VC_t *vc, MPIR_Request *req, int *done)
waiting, otherwise wait for a bounded amount of time. */
while ((len = copy_buf->len[buf_num].val) == 0) {
int p = 0;

if (!copy_buf->sender_present.val || p == LMT_POLLS_BEFORE_GIVING_UP) {
req->dev.segment_first = first;
vc_ch->lmt_buf_num = buf_num;
Expand Down Expand Up @@ -727,7 +762,7 @@ static inline int lmt_shm_progress_vc(MPIDI_VC_t *vc, int *done)
if (vc_ch->lmt_active_lmt == NULL)
{
/* couldn't find an appropriate request, try again later */

if (LMT_SHM_Q_EMPTY(vc_ch->lmt_queue))
*done = TRUE; /* There's nothing in the queue. VC
must have terminated */
Expand Down Expand Up @@ -956,7 +991,7 @@ static int MPID_nem_delete_shm_region(MPID_nem_copy_buf_t **buf_p, MPL_shm_hnd_t
mpi_errno = MPL_shm_seg_remove(*handle_p);
if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); }

mpi_errno = MPID_nem_detach_shm_region(buf_p, *handle_p);
mpi_errno = MPID_nem_detach_shm_region(buf_p, *handle_p);
if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); }

mpi_errno = MPL_shm_hnd_finalize(handle_p);
Expand Down

0 comments on commit 1d6ca46

Please sign in to comment.