Skip to content

Commit

Permalink
ch3/win: Add support for shared memory window binding to MCDRAM
Browse files Browse the repository at this point in the history
This commit adds support for moving shared memory allocated with either
`MPI_Win_allocate` or `MPI_Win_allocate_shared` into MCDRAM. Two
mechanisms are provided to achieve MCDRAM placement:

 - User hints passed to the implementation through the info object
 - CVARs passed through the environment, these overwriting the previous
  • Loading branch information
Giuseppe Congiu committed Aug 15, 2018
1 parent a2ce4b3 commit d0a4ae1
Showing 1 changed file with 147 additions and 0 deletions.
147 changes: 147 additions & 0 deletions src/mpid/ch3/src/ch3u_win_fns.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,61 @@
#include "mpir_info.h"
#include "mpidrma.h"

/*
=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===
categories:
- name : CH3_SHMEM_WIN
description : cvars that control shared memory window binding
cvars:
- name : MPIR_CVAR_CH3_SHMEM_WIN_BIND_TYPE
category : CH3_SHMEM_WIN
type : string
default : "DEFAULT"
class : none
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_GROUP
description : >-
The memory type to be used for allocating memory in a KNL system.
- name : MPIR_CVAR_CH3_SHMEM_WIN_BIND_POLICY
category : CH3_SHMEM_WIN
type : string
default : "DEFAULT"
class : none
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_GROUP
description : >-
The memory policy to be used for allocating memory in a KNL system.
- name : MPIR_CVAR_CH3_SHMEM_WIN_BIND_RULE
category : CH3_SHMEM_WIN
type : string
default : "NONE"
class : none
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_GROUP
description : >-
The memory policy rule to be used if MCDRAM allocation fails.
- name : MPIR_CVAR_CH3_SHMEM_WIN_BIND_PROC
category : CH3_SHMEM_WIN
type : string
default : "NOCPUBIND"
class : none
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_GROUP
description : >-
The memory migration policy to be used if process binding changes.
=== END_MPI_T_CVAR_INFO_BLOCK ===
*/

extern MPIR_T_pvar_timer_t PVAR_TIMER_rma_wincreate_allgather ATTRIBUTE((unused));

static void win_set_mempolicy(MPIR_Info * info, void *baseptr, size_t len);

#undef FUNCNAME
#define FUNCNAME MPIDI_Win_fns_init
#undef FCNAME
Expand Down Expand Up @@ -224,6 +277,7 @@ int MPIDI_CH3U_Win_allocate(MPI_Aint size, int disp_unit, MPIR_Info * info,
MPIDI_CH3U_Win_fns.allocate_shm(size, disp_unit, info, comm_ptr, baseptr, win_ptr);
if (mpi_errno != MPI_SUCCESS)
MPIR_ERR_POP(mpi_errno);
win_set_mempolicy(info, baseptr, size);
goto fn_exit;
}
}
Expand Down Expand Up @@ -544,3 +598,96 @@ int MPID_Win_get_info(MPIR_Win * win, MPIR_Info ** info_used)
fn_fail:
goto fn_exit;
}

#undef FUNCNAME
#define FUNCNAME win_set_mempolicy
#undef FCNAME
#define FCNAME MPL_QUOTE(FUNCNAME)
void win_set_mempolicy(MPIR_Info * info, void *baseptr, size_t len)
{
#ifdef HAVE_HWLOC
int info_flag;
int flags = HWLOC_MEMBIND_BYNODESET;
char info_value[MPI_MAX_INFO_VAL + 1];
hwloc_membind_policy_t policy;

if (strcmp(MPIR_CVAR_CH3_SHMEM_WIN_BIND_TYPE, "DEFAULT") == 0 && info) { /* no ENV set */
MPIR_Info_get_impl(info, "shm_win_bind_type", MPI_MAX_INFO_VAL, info_value, &info_flag);
if (info_flag) {
if (strcmp(info_value, "MCDRAM") == 0) {
MPIR_Info_get_impl(info, "shm_win_bind_policy", MPI_MAX_INFO_VAL, info_value,
&info_flag);
if (info_flag) {
if (strcmp(info_value, "DEFAULT") == 0)
policy = HWLOC_MEMBIND_DEFAULT;
else if (strcmp(info_value, "FIRSTTOUCH") == 0)
policy = HWLOC_MEMBIND_FIRSTTOUCH;
else if (strcmp(info_value, "BIND") == 0)
policy = HWLOC_MEMBIND_BIND;
else if (strcmp(info_value, "INTERLEAVE") == 0)
policy = HWLOC_MEMBIND_INTERLEAVE;
else
policy = HWLOC_MEMBIND_DEFAULT;
}
MPIR_Info_get_impl(info, "shm_win_bind_rule", MPI_MAX_INFO_VAL, info_value,
&info_flag);
if (info_flag) {
if (strcmp(info_value, "STRICT") == 0)
flags |= HWLOC_MEMBIND_STRICT;
}
MPIR_Info_get_impl(info, "shm_win_bind_proc", MPI_MAX_INFO_VAL, info_value,
&info_flag);
if (info_flag) {
if (strcmp(info_value, "PROCESS") == 0)
flags |= HWLOC_MEMBIND_PROCESS;
else if (strcmp(info_value, "THREAD") == 0)
flags |= HWLOC_MEMBIND_THREAD;
else if (strcmp(info_value, "MIGRATE") == 0)
flags |= HWLOC_MEMBIND_MIGRATE;
else if (strcmp(info_value, "NOCPUBIND") == 0)
flags |= HWLOC_MEMBIND_NOCPUBIND;
else
flags |= HWLOC_MEMBIND_NOCPUBIND;
}
} else {
policy = HWLOC_MEMBIND_DEFAULT;
flags |= HWLOC_MEMBIND_NOCPUBIND;
}
}
} else if (strcmp(MPIR_CVAR_CH3_SHMEM_WIN_BIND_TYPE, "MCDRAM") == 0) {
if (strcmp(MPIR_CVAR_CH3_SHMEM_WIN_BIND_POLICY, "DEFAULT") == 0)
policy = HWLOC_MEMBIND_DEFAULT;
else if (strcmp(MPIR_CVAR_CH3_SHMEM_WIN_BIND_POLICY, "FIRSTTOUCH") == 0)
policy = HWLOC_MEMBIND_FIRSTTOUCH;
else if (strcmp(MPIR_CVAR_CH3_SHMEM_WIN_BIND_POLICY, "BIND") == 0)
policy = HWLOC_MEMBIND_BIND;
else if (strcmp(MPIR_CVAR_CH3_SHMEM_WIN_BIND_POLICY, "INTERLEAVE") == 0)
policy = HWLOC_MEMBIND_INTERLEAVE;
else
policy = HWLOC_MEMBIND_DEFAULT;

if (strcmp(MPIR_CVAR_CH3_SHMEM_WIN_BIND_RULE, "STRICT") == 0)
flags |= HWLOC_MEMBIND_STRICT;

if (strcmp(MPIR_CVAR_CH3_SHMEM_WIN_BIND_PROC, "PROCESS") == 0)
flags |= HWLOC_MEMBIND_PROCESS;
else if (strcmp(MPIR_CVAR_CH3_SHMEM_WIN_BIND_PROC, "THREAD") == 0)
flags |= HWLOC_MEMBIND_THREAD;
else if (strcmp(MPIR_CVAR_CH3_SHMEM_WIN_BIND_PROC, "MIGRATE") == 0)
flags |= HWLOC_MEMBIND_MIGRATE;
else if (strcmp(MPIR_CVAR_CH3_SHMEM_WIN_BIND_PROC, "NOCPUBIND") == 0)
flags |= HWLOC_MEMBIND_NOCPUBIND;
else
flags |= HWLOC_MEMBIND_NOCPUBIND;
} else {
policy = HWLOC_MEMBIND_DEFAULT;
flags |= HWLOC_MEMBIND_NOCPUBIND;
}

if (MPIR_Process.mcdramset_is_valid)
hwloc_set_area_membind(MPIR_Process.topology, *(void **) baseptr, len,
MPIR_Process.bindset_is_valid ?
MPIR_Process.mcdram_sibling : MPIR_Process.mcdram_nodes,
policy, flags);
#endif
}

0 comments on commit d0a4ae1

Please sign in to comment.