Skip to content

Commit

Permalink
Updates the PSM2 library to match release 11.2.173
Browse files Browse the repository at this point in the history
Fixes include:

Improved handling of the CUDA memory handle cache.

Improved handling of the case where the libpsm2 library is opened
multiple times by a single process. (This can happen when, for example,
multiple Open MPI transport layers all access libpsm2.)

Signed-off-by: Michael Heinz <[email protected]>
  • Loading branch information
Michael Heinz committed May 13, 2020
1 parent b2aa520 commit be11661
Show file tree
Hide file tree
Showing 13 changed files with 344 additions and 151 deletions.
2 changes: 1 addition & 1 deletion COMMIT
Original file line number Diff line number Diff line change
@@ -1 +1 @@
bc17e0522f6b64e8e054d3cfea4506ac155724c2
61553edd6b9fefce4a246b4701abc213e7c36b2b
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ dist: distclean
PRUNE_LIST=""; \
for pd in ".git" "cscope*" "$(shell realpath --relative-to=${top_srcdir} ${OUTDIR})" \
"*.orig" "*~" "#*" ".gitignore" "doc" "libcm" "psm.supp" "test" "psm_hal_MOCK" \
"tools" "artifacts" "*.rej.patch"; do \
"psm_test" "tools" "artifacts" "*.rej.patch"; do \
PRUNE_LIST="$$PRUNE_LIST -name $$pd -prune -o"; \
done; \
for hid in psm_hal_* ; do \
Expand Down
7 changes: 7 additions & 0 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,13 @@ Note: It is also possible to use rpm command to install rpm's, but it is recomme
that one use yum/dnf as rpm tool has issues with name changes and obsoletes tags.
yum or dnf should be better able to resolve dependency issues.

TESTING
=======

Please see the subdirectory psm_test, starting with the file: psm_test/README for
code and instructions on testing the psm2 library. Additionally, the
directory: psm_test/samples contains sample code to test.

RELATED SOFTWARE TO PSM2
========================

Expand Down
55 changes: 53 additions & 2 deletions include/rbtree.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,22 @@

#include <string.h> /* for memset declaration */

#if !defined ( RBTREE_GET_LEFTMOST ) || \
// RBTREE_CMP should be a comparator, i.e. RBTREE_CMP(a, b) should evaluate to
// -1, 0, or 1 depending on if a < b, a == b, or a > b, respectively.
#ifdef RBTREE_CMP

#if defined(RBTREE_GET_LEFTMOST) || defined(RBTREE_GET_RIGHTMOST)
#error Cannot define both RBTREE_CMP and RBTREE_GET_(LEFT|RIGHT)MOST
#endif

#elif !defined ( RBTREE_GET_LEFTMOST ) || \
! defined ( RBTREE_GET_RIGHTMOST ) || \
! defined ( RBTREE_MAP_COUNT ) || \
! defined ( RBTREE_ASSERT )
#error "You must define RBTREE_GET_LEFTMOST and RBTREE_GET_RIGHTMOST and \
RBTREE_MAP_COUNT and RBTREE_ASSERT before including rbtree.c"
#endif

#endif /* RBTREE_CMP */

#define IN /* nothing */

Expand All @@ -117,13 +126,24 @@ static void ips_cl_qmap_remove_item(
static cl_map_item_t* ips_cl_qmap_successor(
IN cl_qmap_t* const p_map,
IN const cl_map_item_t* p_item);


#ifndef RBTREE_NO_EMIT_IPS_CL_QMAP_PREDECESSOR
static cl_map_item_t* ips_cl_qmap_predecessor(
IN cl_qmap_t* const p_map,
IN const cl_map_item_t* p_item);
#endif

#if defined(RBTREE_GET_LEFTMOST)
static cl_map_item_t* ips_cl_qmap_search(
IN cl_qmap_t* const p_map,
IN unsigned long start,
IN unsigned long end);
#else
static cl_map_item_t* ips_cl_qmap_searchv(
cl_qmap_t* const p_map,
const RBTREE_MI_PL *key);
#endif

/*
* Get the root.
Expand Down Expand Up @@ -380,7 +400,11 @@ ips_cl_qmap_insert_item(
p_insert_at = p_comp_item;

/* Traverse the tree until the correct insertion point is found. */
#ifdef RBTREE_GET_LEFTMOST
if( RBTREE_GET_LEFTMOST(&p_item->payload) < RBTREE_GET_LEFTMOST(&p_insert_at->payload) )
#else
if(RBTREE_CMP(&p_item->payload, &p_insert_at->payload) < 0)
#endif
{
p_comp_item = p_insert_at->p_left;
compare_res = 1;
Expand Down Expand Up @@ -604,6 +628,11 @@ ips_cl_qmap_successor(
}
}

// When includer defines RBTREE_CMP, ips_cl_qmap_search() is not emitted.
// When this happens, ips_cl_qmap_predecessor() may not be called.
// Combined with -Werror -Wunused-function, libpsm2 fails to build.
// So provide macro to control emitting this function
#ifndef RBTREE_NO_EMIT_IPS_CL_QMAP_PREDECESSOR
static cl_map_item_t *
ips_cl_qmap_predecessor(
IN cl_qmap_t* const p_map,
Expand All @@ -627,7 +656,9 @@ ips_cl_qmap_predecessor(
return p_tmp;
}
}
#endif /* RBTREE_NO_EMIT_IPS_CL_QMAP_PREDECESSOR */

#if defined(RBTREE_GET_LEFTMOST)
/*
* return the first node with buffer overlapping or zero.
*/
Expand Down Expand Up @@ -690,3 +721,23 @@ ips_cl_qmap_search(cl_qmap_t * const p_map,

return p_item;
}
#else /* defined(...LEFTMOST) || defined(...RIGHTMOST) */
static cl_map_item_t *
ips_cl_qmap_searchv(cl_qmap_t * const p_map, const RBTREE_MI_PL *key)
{
RBTREE_ASSERT( p_map );
cl_map_item_t *p_item = __cl_map_root(p_map);

while (p_item != p_map->nil_item) {
if (RBTREE_CMP(key, &p_item->payload) > 0) {
p_item = p_item->p_right;
} else if (RBTREE_CMP(key, &p_item->payload) < 0) {
p_item = p_item->p_left;
} else {
break;
}
}

return p_item;
}
#endif /* defined(...LEFTMOST) || defined(...RIGHTMOST) */
36 changes: 24 additions & 12 deletions psm.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,14 @@ static int psmi_verno = PSMI_VERNO_MAKE(PSM2_VERNO_MAJOR, PSM2_VERNO_MINOR);
static int psmi_verno_client_val;
int psmi_epid_ver;

// Special psmi_refcount values
#define PSMI_NOT_INITIALIZED 0
#define PSMI_INITIALIZED 1
#define PSMI_FINALIZED -1 /* Prevent the user from calling psm2_init
* once psm_finalize has been called. */
static int psmi_isinit = PSMI_NOT_INITIALIZED;
#define PSMI_FINALIZED -1

// PSM2 doesn't support transitioning out of the PSMI_FINALIZED state
// once psmi_refcount is set to PSMI_FINALIZED, any further attempts to change
// psmi_refcount should be treated as an error
static int psmi_refcount = PSMI_NOT_INITIALIZED;

/* Global lock used for endpoint creation and destroy
* (in functions psm2_ep_open and psm2_ep_close) and also
Expand Down Expand Up @@ -104,9 +107,8 @@ uint32_t gdr_copy_threshold_recv;
* It is supposed to be filled with logical OR
* on conditional compilation basis
* along with future features/capabilities.
* At the very beginning we start with Multi EPs.
*/
uint64_t psm2_capabilities_bitset = PSM2_MULTI_EP_CAP;
uint64_t psm2_capabilities_bitset = PSM2_MULTI_EP_CAP | PSM2_LIB_REFCOUNT_CAP;

int psmi_verno_client()
{
Expand All @@ -130,7 +132,7 @@ int psmi_verno_isinteroperable(uint16_t verno)

int MOCKABLE(psmi_isinitialized)()
{
return (psmi_isinit == PSMI_INITIALIZED);
return (psmi_refcount > 0);
}
MOCK_DEF_EPILOGUE(psmi_isinitialized);

Expand Down Expand Up @@ -356,10 +358,12 @@ psm2_error_t __psm2_init(int *major, int *minor)
GENERIC_PERF_SET_SLOT_NAME(PSM_TX_SPEEDPATH_CTR, "TX");
GENERIC_PERF_SET_SLOT_NAME(PSM_RX_SPEEDPATH_CTR, "RX");

if (psmi_isinit == PSMI_INITIALIZED)
if (psmi_refcount > 0) {
psmi_refcount++;
goto update;
}

if (psmi_isinit == PSMI_FINALIZED) {
if (psmi_refcount == PSMI_FINALIZED) {
err = PSM2_IS_FINALIZED;
goto fail;
}
Expand Down Expand Up @@ -435,7 +439,7 @@ psm2_error_t __psm2_init(int *major, int *minor)
((id.eax & CPUID_EXMODEL_MASK) >> 12);
}

psmi_isinit = PSMI_INITIALIZED;
psmi_refcount++;
/* hfi_debug lives in libhfi.so */
psmi_getenv("PSM2_TRACEMASK",
"Mask flags for tracing",
Expand Down Expand Up @@ -520,7 +524,6 @@ psm2_error_t __psm2_init(int *major, int *minor)
#endif

update:

if (getenv("PSM2_IDENTIFY")) {
Dl_info info_psm;
char ofed_delta[100] = "";
Expand Down Expand Up @@ -557,6 +560,8 @@ psm2_error_t __psm2_init(int *major, int *minor)
*major = (int)psmi_verno_major;
*minor = (int)psmi_verno_minor;
fail:
_HFI_DBG("psmi_refcount=%d,err=%u\n", psmi_refcount, err);

PSM2_LOG_MSG("leaving");
return err;
}
Expand Down Expand Up @@ -779,7 +784,14 @@ psm2_error_t __psm2_finalize(void)

PSM2_LOG_MSG("entering");

_HFI_DBG("psmi_refcount=%d\n", psmi_refcount);
PSMI_ERR_UNLESS_INITIALIZED(NULL);
psmi_assert(psmi_refcount > 0);
psmi_refcount--;

if (psmi_refcount > 0) {
return PSM2_OK;
}

/* When PSM_PERF is enabled, the following line causes the
instruction cycles gathered in the current run to be dumped
Expand Down Expand Up @@ -856,7 +868,7 @@ psm2_error_t __psm2_finalize(void)
}
#endif

psmi_isinit = PSMI_FINALIZED;
psmi_refcount = PSMI_FINALIZED;
PSM2_LOG_MSG("leaving");
psmi_log_fini();

Expand Down
1 change: 1 addition & 0 deletions psm2.h
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,7 @@ psm2_error_t psm2_init(int *api_verno_major, int *api_verno_minor);
* consecutive bits : 0x2, 0x4 ... and so on.
*/
#define PSM2_MULTI_EP_CAP 0x1 /* Multiple Endpoints capability */
#define PSM2_LIB_REFCOUNT_CAP 0x2 /* Library finalization is managed with reference count */

/** @brief PSM2 capabilities provider
*
Expand Down
12 changes: 4 additions & 8 deletions psm2_hal.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,14 +326,10 @@ static struct _psmi_hal_instance *psmi_hal_get_pi_inst(void)
p->params.num_ports = nports;
p->params.default_pkey = dflt_pkey;
p->params.sw_status |= valid_flags;
p->params.unit_active = (uint8_t *) psmi_calloc(PSMI_EP_NONE, UNDEFINED, nunits,
sizeof(uint8_t));
p->params.unit_active_valid = (uint8_t *) psmi_calloc(PSMI_EP_NONE, UNDEFINED, nunits,
sizeof(uint8_t));
p->params.port_active = (uint8_t *) psmi_calloc(PSMI_EP_NONE, UNDEFINED, nunits*nports,
sizeof(uint8_t));
p->params.port_active_valid = (uint8_t *) psmi_calloc(PSMI_EP_NONE, UNDEFINED, nunits*nports,
sizeof(uint8_t));
p->params.unit_active = (int8_t *) psmi_calloc(PSMI_EP_NONE, UNDEFINED, nunits, sizeof(int8_t));
p->params.unit_active_valid = (int8_t *) psmi_calloc(PSMI_EP_NONE, UNDEFINED, nunits, sizeof(int8_t));
p->params.port_active = (int8_t *) psmi_calloc(PSMI_EP_NONE, UNDEFINED, nunits*nports, sizeof(int8_t));
p->params.port_active_valid = (int8_t *) psmi_calloc(PSMI_EP_NONE, UNDEFINED, nunits*nports, sizeof(int8_t));
p->params.num_contexts = (uint16_t *) psmi_calloc(PSMI_EP_NONE, UNDEFINED, nunits,
sizeof(uint16_t));
p->params.num_contexts_valid = (uint16_t *) psmi_calloc(PSMI_EP_NONE, UNDEFINED, nunits,
Expand Down
4 changes: 2 additions & 2 deletions psm2_hal.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ typedef struct _psmi_hal_params
uint16_t num_units;
uint16_t num_ports;
uint16_t default_pkey;
uint8_t *unit_active,*unit_active_valid;
uint8_t *port_active,*port_active_valid;
int8_t *unit_active,*unit_active_valid;
int8_t *port_active,*port_active_valid;
uint16_t *num_contexts,*num_contexts_valid;
uint16_t *num_free_contexts,*num_free_contexts_valid;
} psmi_hal_params_t;
Expand Down
30 changes: 30 additions & 0 deletions psm_user.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,36 @@ CUresult (*psmi_cuDevicePrimaryCtxRelease)(CUdevice device);
} \
} while (0)

/**
* Similar to PSMI_CUDA_CALL() except does not error out
* if func(args) returns CUDA_SUCCESS or except_err
*
* Invoker must provide 'CUresult cudaerr' in invoked scope
* so invoker can inspect whether cudaerr == CUDA_SUCCESS or
* cudaerr == except_err after expanded code is executed.
*/
#define PSMI_CUDA_CALL_EXCEPT(except_err, func, args...) do { \
cudaerr = psmi_##func(args); \
if (cudaerr != CUDA_SUCCESS && cudaerr != except_err) { \
if (ctxt == NULL) \
_HFI_ERROR( \
"Check if CUDA is initialized" \
"before psm2_ep_open call \n"); \
_HFI_ERROR( \
"CUDA failure: %s() (at %s:%d)" \
"returned %d\n", \
#func, __FILE__, __LINE__, cudaerr); \
psmi_handle_error( \
PSMI_EP_NORETURN, PSM2_INTERNAL_ERR, \
"Error returned from CUDA function.\n");\
} else if (cudaerr == except_err) { \
_HFI_INFO( \
"CUDA warning: %s() (at %s:%d)" \
"returned %d\n", \
#func, __FILE__, __LINE__, cudaerr); \
} \
} while (0)

#define PSMI_CUDA_CHECK_EVENT(event, cudaerr) do { \
cudaerr = psmi_cuEventQuery(event); \
if ((cudaerr != CUDA_SUCCESS) && \
Expand Down
Loading

0 comments on commit be11661

Please sign in to comment.