From d7053a306a74d71b8bc5a05782f3125727d20f6c Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 29 Nov 2018 11:38:07 +0900 Subject: [PATCH 01/94] btl/openib: delay UCX warning to add_procs() If UCX is available, then pml/ucx will be used instead of pml/ob1 + btl/openib, so there is no need to warn about btl/openib not supporting Infiniband. Signed-off-by: Gilles Gouaillardet (cherry picked from commit open-mpi/ompi@0a2ce580405ae86788e5f0e7d5264fce162e73c8) --- opal/mca/btl/openib/btl_openib.c | 130 +++++++++-------- opal/mca/btl/openib/btl_openib.h | 9 +- opal/mca/btl/openib/btl_openib_component.c | 154 ++++++++++++--------- opal/mca/btl/openib/btl_openib_proc.c | 5 +- 4 files changed, 171 insertions(+), 127 deletions(-) diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index dc279df8347..a3bc12190e7 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -19,8 +19,8 @@ * Copyright (c) 2009 IBM Corporation. All rights reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2013-2015 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved * $COPYRIGHT$ * @@ -1040,6 +1040,14 @@ int mca_btl_openib_add_procs( int btl_rank = 0; volatile mca_btl_base_endpoint_t* endpoint; + + if (! openib_btl->allowed) { + opal_bitmap_clear_all_bits(reachable); + opal_show_help("help-mpi-btl-openib.txt", "ib port not selected", + true, opal_process_info.nodename, + ibv_get_device_name(openib_btl->device->ib_dev), openib_btl->port_num); + } + btl_rank = get_openib_btl_params(openib_btl, &lcl_subnet_id_port_cnt); if( 0 > btl_rank ){ return OPAL_ERR_NOT_FOUND; @@ -1639,75 +1647,77 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl) return OPAL_SUCCESS; } - /* Release all QPs */ - if (NULL != openib_btl->device->endpoints) { - for (ep_index=0; - ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints); - ep_index++) { - endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints, + if (openib_btl->allowed) { + /* Release all QPs */ + if (NULL != openib_btl->device->endpoints) { + for (ep_index=0; + ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints); + ep_index++) { + endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints, ep_index); - if(!endpoint) { - BTL_VERBOSE(("In finalize, got another null endpoint")); - continue; - } - if(endpoint->endpoint_btl != openib_btl) { - continue; - } - for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) { - if(openib_btl->device->eager_rdma_buffers[i] == endpoint) { - openib_btl->device->eager_rdma_buffers[i] = NULL; - OBJ_RELEASE(endpoint); + if(!endpoint) { + BTL_VERBOSE(("In finalize, got another null endpoint")); + continue; } + if(endpoint->endpoint_btl != openib_btl) { + continue; + } + for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) { + if(openib_btl->device->eager_rdma_buffers[i] == endpoint) { + openib_btl->device->eager_rdma_buffers[i] = NULL; + OBJ_RELEASE(endpoint); + } + } + opal_pointer_array_set_item(openib_btl->device->endpoints, + ep_index, NULL); + assert(((opal_object_t*)endpoint)->obj_reference_count == 1); + OBJ_RELEASE(endpoint); } - opal_pointer_array_set_item(openib_btl->device->endpoints, - ep_index, NULL); - assert(((opal_object_t*)endpoint)->obj_reference_count == 1); - OBJ_RELEASE(endpoint); } - } - - /* Release SRQ resources */ - for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) { - if(!BTL_OPENIB_QP_TYPE_PP(qp)) { - MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS( - &openib_btl->qps[qp].u.srq_qp.pending_frags[0]); - MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS( - &openib_btl->qps[qp].u.srq_qp.pending_frags[1]); - if (NULL != openib_btl->qps[qp].u.srq_qp.srq) { - opal_mutex_t *lock = - &mca_btl_openib_component.srq_manager.lock; - opal_hash_table_t *srq_addr_table = - &mca_btl_openib_component.srq_manager.srq_addr_table; - - opal_mutex_lock(lock); - if (OPAL_SUCCESS != - opal_hash_table_remove_value_ptr(srq_addr_table, - &openib_btl->qps[qp].u.srq_qp.srq, - sizeof(struct ibv_srq *))) { - BTL_VERBOSE(("Failed to remove SRQ %d entry from hash table.", qp)); - rc = OPAL_ERROR; - } - opal_mutex_unlock(lock); - if (0 != ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)) { - BTL_VERBOSE(("Failed to close SRQ %d", qp)); - rc = OPAL_ERROR; + /* Release SRQ resources */ + for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) { + if(!BTL_OPENIB_QP_TYPE_PP(qp)) { + MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS( + &openib_btl->qps[qp].u.srq_qp.pending_frags[0]); + MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS( + &openib_btl->qps[qp].u.srq_qp.pending_frags[1]); + if (NULL != openib_btl->qps[qp].u.srq_qp.srq) { + opal_mutex_t *lock = + &mca_btl_openib_component.srq_manager.lock; + + opal_hash_table_t *srq_addr_table = + &mca_btl_openib_component.srq_manager.srq_addr_table; + + opal_mutex_lock(lock); + if (OPAL_SUCCESS != + opal_hash_table_remove_value_ptr(srq_addr_table, + &openib_btl->qps[qp].u.srq_qp.srq, + sizeof(struct ibv_srq *))) { + BTL_VERBOSE(("Failed to remove SRQ %d entry from hash table.", qp)); + rc = OPAL_ERROR; + } + opal_mutex_unlock(lock); + if (0 != ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)) { + BTL_VERBOSE(("Failed to close SRQ %d", qp)); + rc = OPAL_ERROR; + } } - } - OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[0]); - OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[1]); + OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[0]); + OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[1]); + } } - } - /* Finalize the CPC modules on this openib module */ - for (i = 0; i < openib_btl->num_cpcs; ++i) { - if (NULL != openib_btl->cpcs[i]->cbm_finalize) { - openib_btl->cpcs[i]->cbm_finalize(openib_btl, openib_btl->cpcs[i]); + /* Finalize the CPC modules on this openib module */ + for (i = 0; i < openib_btl->num_cpcs; ++i) { + if (NULL != openib_btl->cpcs[i]->cbm_finalize) { + openib_btl->cpcs[i]->cbm_finalize(openib_btl, openib_btl->cpcs[i]); + } + free(openib_btl->cpcs[i]); } - free(openib_btl->cpcs[i]); + free(openib_btl->cpcs); } - free(openib_btl->cpcs); /* Release device if there are no more users */ if(!(--openib_btl->device->btls)) { diff --git a/opal/mca/btl/openib/btl_openib.h b/opal/mca/btl/openib/btl_openib.h index 6b4dd0466bf..a5817a8daee 100644 --- a/opal/mca/btl/openib/btl_openib.h +++ b/opal/mca/btl/openib/btl_openib.h @@ -18,8 +18,8 @@ * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013-2014 NVIDIA Corporation. All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -164,6 +164,9 @@ struct mca_btl_openib_component_t { int ib_num_btls; /**< number of devices available to the openib component */ + int ib_allowed_btls; + /**< number of devices allowed to the openib component */ + struct mca_btl_openib_module_t **openib_btls; /**< array of available BTLs */ @@ -501,6 +504,8 @@ struct mca_btl_openib_module_t { int local_procs; /** number of local procs */ bool atomic_ops_be; /** atomic result is big endian */ + + bool allowed; /** is this port allowed */ }; typedef struct mca_btl_openib_module_t mca_btl_openib_module_t; diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index 5c7cce7b57b..4a714b4d1b3 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -19,8 +19,8 @@ * Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. * $COPYRIGHT$ * @@ -269,7 +269,7 @@ static int btl_openib_modex_send(void) /* uint8_t for number of modules in the message */ 1 + /* For each module: */ - mca_btl_openib_component.ib_num_btls * + mca_btl_openib_component.ib_allowed_btls * ( /* Common module data */ modex_message_size + @@ -278,6 +278,9 @@ static int btl_openib_modex_send(void) ); /* For each module, add in the size of the per-CPC data */ for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) { + if (! mca_btl_openib_component.openib_btls[i]->allowed) { + continue; + } for (j = 0; j < mca_btl_openib_component.openib_btls[i]->num_cpcs; ++j) { @@ -300,12 +303,15 @@ static int btl_openib_modex_send(void) /* Pack the number of modules */ offset = message; - pack8(&offset, mca_btl_openib_component.ib_num_btls); - opal_output(-1, "modex sending %d btls (packed: %d, offset now at %d)", mca_btl_openib_component.ib_num_btls, *((uint8_t*) message), (int) (offset - message)); + pack8(&offset, mca_btl_openib_component.ib_allowed_btls); + opal_output(-1, "modex sending %d btls (packed: %d, offset now at %d)", mca_btl_openib_component.ib_allowed_btls, *((uint8_t*) message), (int) (offset - message)); /* Pack each of the modules */ for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) { + if (! mca_btl_openib_component.openib_btls[i]->allowed) { + continue; + } /* Pack the modex common message struct. */ size = modex_message_size; @@ -628,22 +634,35 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, * policy. For ancient OFED, only allow if user has set * the MCA parameter. */ + if (! mca_btl_openib_component.allow_ib #if HAVE_DECL_IBV_LINK_LAYER_ETHERNET - if ((IBV_LINK_LAYER_INFINIBAND == ib_port_attr->link_layer) && - (false == mca_btl_openib_component.allow_ib)) { - opal_show_help("help-mpi-btl-openib.txt", "ib port not selected", - true, opal_process_info.nodename, - ibv_get_device_name(device->ib_dev), port_num); - return OPAL_ERR_NOT_FOUND; - } -#else - if (false == mca_btl_openib_component.allow_ib) { - opal_show_help("help-mpi-btl-openib.txt", "ib port not selected", - true, opal_process_info.nodename, - ibv_get_device_name(device->ib_dev), port_num); - return OPAL_ERR_NOT_FOUND; - } + && IBV_LINK_LAYER_INFINIBAND == ib_port_attr->link_layer #endif + ) { + openib_btl = (mca_btl_openib_module_t *) calloc(1, sizeof(mca_btl_openib_module_t)); + if(NULL == openib_btl) { + BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__)); + return OPAL_ERR_OUT_OF_RESOURCE; + } + memcpy(openib_btl, &mca_btl_openib_module, + sizeof(mca_btl_openib_module)); + ib_selected = OBJ_NEW(mca_btl_base_selected_module_t); + ib_selected->btl_module = (mca_btl_base_module_t*) openib_btl; + openib_btl->device = device; + openib_btl->port_num = (uint8_t) port_num; + openib_btl->allowed = false; + OBJ_CONSTRUCT(&openib_btl->ib_lock, opal_mutex_t); + opal_list_append(btl_list, (opal_list_item_t*) ib_selected); + opal_pointer_array_add(device->device_btls, (void*) openib_btl); + ++device->btls; + ++mca_btl_openib_component.ib_num_btls; + if (-1 != mca_btl_openib_component.ib_max_btls && + mca_btl_openib_component.ib_num_btls >= + mca_btl_openib_component.ib_max_btls) { + return OPAL_ERR_VALUE_OUT_OF_BOUNDS; + } + return OPAL_SUCCESS; + } /* Ensure that the requested GID index (via the @@ -880,10 +899,13 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, } } + openib_btl->allowed = true; + opal_list_append(btl_list, (opal_list_item_t*) ib_selected); opal_pointer_array_add(device->device_btls, (void*) openib_btl); ++device->btls; ++mca_btl_openib_component.ib_num_btls; + ++mca_btl_openib_component.ib_allowed_btls; if (-1 != mca_btl_openib_component.ib_max_btls && mca_btl_openib_component.ib_num_btls >= mca_btl_openib_component.ib_max_btls) { @@ -2912,36 +2934,38 @@ btl_openib_component_init(int *num_btl_modules, goto no_btls; } - /* Now that we know we have devices and ports that we want to use, - init CPC components */ - if (OPAL_SUCCESS != (ret = opal_btl_openib_connect_base_init())) { - goto no_btls; - } + if (0 < mca_btl_openib_component.ib_allowed_btls) { + /* Now that we know we have devices and ports that we want to use, + init CPC components */ + if (OPAL_SUCCESS != (ret = opal_btl_openib_connect_base_init())) { + goto no_btls; + } - /* Setup the BSRQ QP's based on the final value of - mca_btl_openib_component.receive_queues. */ - if (OPAL_SUCCESS != setup_qps()) { - goto no_btls; - } - if (mca_btl_openib_component.num_srq_qps > 0 || - mca_btl_openib_component.num_xrc_qps > 0) { - opal_hash_table_t *srq_addr_table = &mca_btl_openib_component.srq_manager.srq_addr_table; - if(OPAL_SUCCESS != opal_hash_table_init( - srq_addr_table, (mca_btl_openib_component.num_srq_qps + - mca_btl_openib_component.num_xrc_qps) * - mca_btl_openib_component.ib_num_btls)) { - BTL_ERROR(("SRQ internal error. Failed to allocate SRQ addr hash table")); + /* Setup the BSRQ QP's based on the final value of + mca_btl_openib_component.receive_queues. */ + if (OPAL_SUCCESS != setup_qps()) { goto no_btls; } - } + if (mca_btl_openib_component.num_srq_qps > 0 || + mca_btl_openib_component.num_xrc_qps > 0) { + opal_hash_table_t *srq_addr_table = &mca_btl_openib_component.srq_manager.srq_addr_table; + if(OPAL_SUCCESS != opal_hash_table_init( + srq_addr_table, (mca_btl_openib_component.num_srq_qps + + mca_btl_openib_component.num_xrc_qps) * + mca_btl_openib_component.ib_num_btls)) { + BTL_ERROR(("SRQ internal error. Failed to allocate SRQ addr hash table")); + goto no_btls; + } + } - /* For XRC: - * from this point we know if MCA_BTL_XRC_ENABLED it true or false */ + /* For XRC: + * from this point we know if MCA_BTL_XRC_ENABLED it true or false */ - /* Init XRC IB Addr hash table */ - if (MCA_BTL_XRC_ENABLED) { - OBJ_CONSTRUCT(&mca_btl_openib_component.ib_addr_table, - opal_hash_table_t); + /* Init XRC IB Addr hash table */ + if (MCA_BTL_XRC_ENABLED) { + OBJ_CONSTRUCT(&mca_btl_openib_component.ib_addr_table, + opal_hash_table_t); + } } /* Allocate space for btl modules */ @@ -2967,31 +2991,34 @@ btl_openib_component_init(int *num_btl_modules, ib_selected = (mca_btl_base_selected_module_t*)item; openib_btl = (mca_btl_openib_module_t*)ib_selected->btl_module; - /* Search for a CPC that can handle this port */ - ret = opal_btl_openib_connect_base_select_for_local_port(openib_btl); - /* If we get NOT_SUPPORTED, then no CPC was found for this - port. But that's not a fatal error -- just keep going; - let's see if we find any usable openib modules or not. */ - if (OPAL_ERR_NOT_SUPPORTED == ret) { - continue; - } else if (OPAL_SUCCESS != ret) { - /* All others *are* fatal. Note that we already did a - show_help in the lower layer */ - goto no_btls; - } + if (openib_btl->allowed) { + /* Search for a CPC that can handle this port */ + ret = opal_btl_openib_connect_base_select_for_local_port(openib_btl); + /* If we get NOT_SUPPORTED, then no CPC was found for this + port. But that's not a fatal error -- just keep going; + let's see if we find any usable openib modules or not. */ + if (OPAL_ERR_NOT_SUPPORTED == ret) { + continue; + } else if (OPAL_SUCCESS != ret) { + /* All others *are* fatal. Note that we already did a + show_help in the lower layer */ + goto no_btls; + } - if (mca_btl_openib_component.max_hw_msg_size > 0 && - (uint32_t)mca_btl_openib_component.max_hw_msg_size > openib_btl->ib_port_attr.max_msg_sz) { - BTL_ERROR(("max_hw_msg_size (%" PRIu32 ") is larger than hw max message size (%" PRIu32 ")", - mca_btl_openib_component.max_hw_msg_size, openib_btl->ib_port_attr.max_msg_sz)); + if (mca_btl_openib_component.max_hw_msg_size > 0 && + (uint32_t)mca_btl_openib_component.max_hw_msg_size > openib_btl->ib_port_attr.max_msg_sz) { + BTL_ERROR(("max_hw_msg_size (%" PRIu32 ") is larger than hw max message size (%" PRIu32 ")", + mca_btl_openib_component.max_hw_msg_size, openib_btl->ib_port_attr.max_msg_sz)); + } + + if (finish_btl_init(openib_btl) != OPAL_SUCCESS) { + goto no_btls; + } } mca_btl_openib_component.openib_btls[i] = openib_btl; OBJ_RELEASE(ib_selected); btls[i] = &openib_btl->super; - if (finish_btl_init(openib_btl) != OPAL_SUCCESS) { - goto no_btls; - } ++i; } /* If we got nothing, then error out */ @@ -3039,6 +3066,7 @@ btl_openib_component_init(int *num_btl_modules, there are no openib BTL's in this process and return NULL. */ mca_btl_openib_component.ib_num_btls = 0; + mca_btl_openib_component.ib_allowed_btls = 0; btl_openib_modex_send(); if (NULL != btls) { free(btls); diff --git a/opal/mca/btl/openib/btl_openib_proc.c b/opal/mca/btl/openib/btl_openib_proc.c index a4b77fa6436..9e891fb55cb 100644 --- a/opal/mca/btl/openib/btl_openib_proc.c +++ b/opal/mca/btl/openib/btl_openib_proc.c @@ -13,8 +13,8 @@ * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015-2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. @@ -277,6 +277,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc) if (0 == ib_proc->proc_port_count) { ib_proc->proc_endpoints = NULL; + goto no_err_exit; } else { ib_proc->proc_endpoints = (volatile mca_btl_base_endpoint_t**) malloc(ib_proc->proc_port_count * From c58c7749815488af1128bbd29e930be9f51ebbfa Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 11 Dec 2018 09:07:22 +0900 Subject: [PATCH 02/94] btl/openib: have add_proc() return immediately when the port is disabled. Fixes an issue introduced in open-mpi/ompi@0a2ce580405ae86788e5f0e7d5264fce162e73c8 This is a one-off commit for the v4.0.x branch since btl/openib has been removed from master. Refs. open-mpi/ompi#6137 Signed-off-by: Gilles Gouaillardet --- opal/mca/btl/openib/btl_openib.c | 1 + opal/mca/btl/openib/btl_openib_proc.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index a3bc12190e7..3bd5fe965da 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -1046,6 +1046,7 @@ int mca_btl_openib_add_procs( opal_show_help("help-mpi-btl-openib.txt", "ib port not selected", true, opal_process_info.nodename, ibv_get_device_name(openib_btl->device->ib_dev), openib_btl->port_num); + return OPAL_SUCCESS; } btl_rank = get_openib_btl_params(openib_btl, &lcl_subnet_id_port_cnt); diff --git a/opal/mca/btl/openib/btl_openib_proc.c b/opal/mca/btl/openib/btl_openib_proc.c index 9e891fb55cb..8f41b9696ad 100644 --- a/opal/mca/btl/openib/btl_openib_proc.c +++ b/opal/mca/btl/openib/btl_openib_proc.c @@ -277,7 +277,6 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc) if (0 == ib_proc->proc_port_count) { ib_proc->proc_endpoints = NULL; - goto no_err_exit; } else { ib_proc->proc_endpoints = (volatile mca_btl_base_endpoint_t**) malloc(ib_proc->proc_port_count * From 8da460558997c42f5c32ee14bd24c4a33e52c40e Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Wed, 12 Dec 2018 14:53:11 +0900 Subject: [PATCH 03/94] btl/openib: immediately release the device when no port is allowed Many thanks to Sergey Oblomov for reporting this issue and the countless traces provided when troubleshooting it. This is a one-off commit for the v4.0.x branch since btl/openib has been removed from master. Refs. open-mpi/ompi#6137 Signed-off-by: Gilles Gouaillardet --- opal/mca/btl/openib/btl_openib.c | 10 +++++----- opal/mca/btl/openib/btl_openib.h | 2 ++ opal/mca/btl/openib/btl_openib_component.c | 14 +++++++++++--- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index 3bd5fe965da..c2686a0676a 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -1045,7 +1045,7 @@ int mca_btl_openib_add_procs( opal_bitmap_clear_all_bits(reachable); opal_show_help("help-mpi-btl-openib.txt", "ib port not selected", true, opal_process_info.nodename, - ibv_get_device_name(openib_btl->device->ib_dev), openib_btl->port_num); + openib_btl->device_name, openib_btl->port_num); return OPAL_SUCCESS; } @@ -1718,11 +1718,11 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl) free(openib_btl->cpcs[i]); } free(openib_btl->cpcs); - } - /* Release device if there are no more users */ - if(!(--openib_btl->device->btls)) { - OBJ_RELEASE(openib_btl->device); + /* Release device if there are no more users */ + if(!(--openib_btl->device->allowed_btls)) { + OBJ_RELEASE(openib_btl->device); + } } if (NULL != openib_btl->qps) { diff --git a/opal/mca/btl/openib/btl_openib.h b/opal/mca/btl/openib/btl_openib.h index a5817a8daee..0b85bfb5662 100644 --- a/opal/mca/btl/openib/btl_openib.h +++ b/opal/mca/btl/openib/btl_openib.h @@ -392,6 +392,7 @@ typedef struct mca_btl_openib_device_t { /* Whether this device supports eager RDMA */ uint8_t use_eager_rdma; uint8_t btls; /** < number of btls using this device */ + uint8_t allowed_btls; /** < number of allowed btls using this device */ opal_pointer_array_t *endpoints; opal_pointer_array_t *device_btls; uint16_t hp_cq_polls; @@ -483,6 +484,7 @@ struct mca_btl_openib_module_t { uint8_t num_cpcs; mca_btl_openib_device_t *device; + char * device_name; uint8_t port_num; /**< ID of the PORT */ uint16_t pkey_index; struct ibv_port_attr ib_port_attr; diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index 4a714b4d1b3..fcc0ac56973 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -648,9 +648,10 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, sizeof(mca_btl_openib_module)); ib_selected = OBJ_NEW(mca_btl_base_selected_module_t); ib_selected->btl_module = (mca_btl_base_module_t*) openib_btl; - openib_btl->device = device; openib_btl->port_num = (uint8_t) port_num; openib_btl->allowed = false; + openib_btl->device = NULL; + openib_btl->device_name = strdup(ibv_get_device_name(device->ib_dev)); OBJ_CONSTRUCT(&openib_btl->ib_lock, opal_mutex_t); opal_list_append(btl_list, (opal_list_item_t*) ib_selected); opal_pointer_array_add(device->device_btls, (void*) openib_btl); @@ -784,6 +785,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, ib_selected = OBJ_NEW(mca_btl_base_selected_module_t); ib_selected->btl_module = (mca_btl_base_module_t*) openib_btl; openib_btl->device = device; + openib_btl->device_name = NULL; openib_btl->port_num = (uint8_t) port_num; openib_btl->pkey_index = pkey_index; openib_btl->lid = lid; @@ -904,6 +906,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, opal_list_append(btl_list, (opal_list_item_t*) ib_selected); opal_pointer_array_add(device->device_btls, (void*) openib_btl); ++device->btls; + ++device->allowed_btls; ++mca_btl_openib_component.ib_num_btls; ++mca_btl_openib_component.ib_allowed_btls; if (-1 != mca_btl_openib_component.ib_max_btls && @@ -1933,7 +1936,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) if (ib_port_attr.active_mtu < device->mtu){ device->mtu = ib_port_attr.active_mtu; } - if (mca_btl_openib_component.apm_ports && device->btls > 0) { + if (mca_btl_openib_component.apm_ports && device->allowed_btls > 0) { init_apm_port(device, i, ib_port_attr.lid); break; } @@ -1969,7 +1972,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) /* If we made a BTL, check APM status and return. Otherwise, fall through and destroy everything */ - if (device->btls > 0) { + if (device->allowed_btls > 0) { /* if apm was enabled it should be > 1 */ if (1 == mca_btl_openib_component.apm_ports) { opal_show_help("help-mpi-btl-openib.txt", @@ -2290,6 +2293,11 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev) good: mca_btl_openib_component.devices_count++; return OPAL_SUCCESS; + } else if (device->btls > 0) { + /* no port is allowed to be used by btl/openib, + * so release the device right away */ + OBJ_RELEASE(device); + return OPAL_SUCCESS; } error: From c44821aef5cb7b8d287961aae8e65e00318c9460 Mon Sep 17 00:00:00 2001 From: Brelle Emmanuel Date: Mon, 1 Apr 2019 18:45:05 +0200 Subject: [PATCH 04/94] pml/ob1: fixed local handle sent during PUT control message In case of using a btl_put in ob1, the handle of the locally registered memory is sent with a PUT control message. In the current master code the sent handle is necessary the handle in the frag but if the handle has been successfully registered in the request, the frag structure does not have any valid handle and all fragments use the request one. I suggest to check if the handle in the fragment is valid and if not to send the handle from the request. Signed-off-by: Brelle Emmanuel (cherry picked from commit e630046a4b82bc01379fb055af4c0e414c2a8e8f) --- ompi/mca/pml/ob1/pml_ob1_recvreq.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 1c95445ab46..02d2a58479a 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -402,6 +402,7 @@ static int mca_pml_ob1_recv_request_put_frag (mca_pml_ob1_rdma_frag_t *frag) #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; #endif + mca_btl_base_registration_handle_t *local_handle = NULL; mca_bml_base_btl_t *bml_btl = frag->rdma_bml; mca_btl_base_descriptor_t *ctl; mca_pml_ob1_rdma_hdr_t *hdr; @@ -410,6 +411,12 @@ static int mca_pml_ob1_recv_request_put_frag (mca_pml_ob1_rdma_frag_t *frag) reg_size = bml_btl->btl->btl_registration_handle_size; + if (frag->local_handle) { + local_handle = frag->local_handle; + } else if (recvreq->local_handle) { + local_handle = recvreq->local_handle; + } + /* prepare a descriptor for rdma control message */ mca_bml_base_alloc (bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof (mca_pml_ob1_rdma_hdr_t) + reg_size, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | @@ -423,7 +430,7 @@ static int mca_pml_ob1_recv_request_put_frag (mca_pml_ob1_rdma_frag_t *frag) hdr = (mca_pml_ob1_rdma_hdr_t *) ctl->des_segments->seg_addr.pval; mca_pml_ob1_rdma_hdr_prepare (hdr, (!recvreq->req_ack_sent) ? MCA_PML_OB1_HDR_TYPE_ACK : 0, recvreq->remote_req_send.lval, frag, recvreq, frag->rdma_offset, - frag->local_address, frag->rdma_length, frag->local_handle, + frag->local_address, frag->rdma_length, local_handle, reg_size); ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_PUT, proc); From 48f824327c7cb2172498618a43d2518f1b650fb6 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Fri, 26 Apr 2019 19:44:18 -0400 Subject: [PATCH 05/94] Fix the leak of fragments for persistent sends. The rdma_frag attached to the send request was not correctly released upon request completion, leaking until MPI_Finalize. A quick solution would have been to add RDMA_FRAG_RETURN at different locations on the send request completion, but it would have unnecessarily made the sendreq completion path more complex. Instead, I added the length to the RDMA fragment so that it can be completed during the remote ack. Be more explicit on the comment. The rdma_frag can only be freed once when the peer forced a protocol change (from RDMA GET to send/recv). Otherwise the fragment will be returned once all data pertaining to it has been trasnferred. NOTE: Had to add a typedef for "opal_atomic_size_t" from master into opal/threads/thread_usage.h into this cherry pick (it is in opal/include/opal_stdatomic.h on master, but that file does not exist here on the v4.0.x branch). Signed-off-by: George Bosilca (cherry picked from commit a16cf0e4dd6df4dea820fecedd5920df632935b8) Signed-off-by: Jeff Squyres --- ompi/mca/pml/ob1/pml_ob1_rdmafrag.h | 6 +++--- ompi/mca/pml/ob1/pml_ob1_recvfrag.c | 4 ---- ompi/mca/pml/ob1/pml_ob1_recvreq.c | 10 +++++++--- ompi/mca/pml/ob1/pml_ob1_sendreq.c | 19 ++++++++++++------- ompi/mca/pml/ob1/pml_ob1_sendreq.h | 5 +---- opal/threads/thread_usage.h | 4 ++++ 6 files changed, 27 insertions(+), 21 deletions(-) diff --git a/ompi/mca/pml/ob1/pml_ob1_rdmafrag.h b/ompi/mca/pml/ob1/pml_ob1_rdmafrag.h index 70a390d8073..176c830974c 100644 --- a/ompi/mca/pml/ob1/pml_ob1_rdmafrag.h +++ b/ompi/mca/pml/ob1/pml_ob1_rdmafrag.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -46,7 +46,8 @@ struct mca_pml_ob1_rdma_frag_t { mca_bml_base_btl_t *rdma_bml; mca_pml_ob1_hdr_t rdma_hdr; mca_pml_ob1_rdma_state_t rdma_state; - size_t rdma_length; + size_t rdma_length; /* how much the fragment will transfer */ + opal_atomic_size_t rdma_bytes_remaining; /* how much is left to be transferred */ void *rdma_req; uint32_t retries; mca_pml_ob1_rdma_frag_callback_t cbfunc; @@ -71,7 +72,6 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_rdma_frag_t); #define MCA_PML_OB1_RDMA_FRAG_RETURN(frag) \ do { \ - /* return fragment */ \ if (frag->local_handle) { \ mca_bml_base_deregister_mem (frag->rdma_bml, frag->local_handle); \ frag->local_handle = NULL; \ diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index c960ac3e10d..66482b4bc62 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -558,10 +558,6 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl, * then throttle sends */ if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NORDMA) { if (NULL != sendreq->rdma_frag) { - if (NULL != sendreq->rdma_frag->local_handle) { - mca_bml_base_deregister_mem (sendreq->req_rdma[0].bml_btl, sendreq->rdma_frag->local_handle); - sendreq->rdma_frag->local_handle = NULL; - } MCA_PML_OB1_RDMA_FRAG_RETURN(sendreq->rdma_frag); sendreq->rdma_frag = NULL; } diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 02d2a58479a..70969415c49 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2018 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -313,7 +313,12 @@ static int mca_pml_ob1_recv_request_ack( return OMPI_SUCCESS; } - /* let know to shedule function there is no need to put ACK flag */ + /* let know to shedule function there is no need to put ACK flag. If not all message went over + * RDMA then we cancel the GET protocol in order to switch back to send/recv. In this case send + * back the remote send request, the peer kept a poointer to the frag locally. In the future we + * might want to cancel the fragment itself, in which case we will have to send back the remote + * fragment instead of the remote request. + */ recvreq->req_ack_sent = true; return mca_pml_ob1_recv_request_ack_send(proc, hdr->hdr_src_req.lval, recvreq, recvreq->req_send_offset, 0, @@ -652,7 +657,6 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq int rc; prev_sent = offset = 0; - bytes_remaining = hdr->hdr_rndv.hdr_msg_length; recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length; recvreq->req_send_offset = 0; recvreq->req_rdma_offset = 0; diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index 1626e13e353..2474374572d 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2018 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -41,7 +41,6 @@ #include "ompi/mca/bml/base/base.h" #include "ompi/memchecker.h" - OBJ_CLASS_INSTANCE(mca_pml_ob1_send_range_t, opal_free_list_item_t, NULL, NULL); @@ -148,10 +147,7 @@ static void mca_pml_ob1_send_request_destruct(mca_pml_ob1_send_request_t* req) { OBJ_DESTRUCT(&req->req_send_ranges); OBJ_DESTRUCT(&req->req_send_range_lock); - if (req->rdma_frag) { - MCA_PML_OB1_RDMA_FRAG_RETURN(req->rdma_frag); - req->rdma_frag = NULL; - } + assert( NULL == req->rdma_frag ); } OBJ_CLASS_INSTANCE( mca_pml_ob1_send_request_t, @@ -262,12 +258,20 @@ mca_pml_ob1_rget_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_length) { mca_pml_ob1_send_request_t *sendreq = (mca_pml_ob1_send_request_t *) frag->rdma_req; mca_bml_base_btl_t *bml_btl = frag->rdma_bml; + size_t frag_remaining; /* count bytes of user data actually delivered and check for request completion */ if (OPAL_LIKELY(0 < rdma_length)) { - OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length); + frag_remaining = OPAL_THREAD_SUB_FETCH_SIZE_T(&frag->rdma_bytes_remaining, (size_t)rdma_length); SPC_USER_OR_MPI(sendreq->req_send.req_base.req_ompi.req_status.MPI_TAG, (ompi_spc_value_t)rdma_length, OMPI_SPC_BYTES_SENT_USER, OMPI_SPC_BYTES_SENT_MPI); + + if( 0 == frag_remaining ) { /* this frag is now completed. Update the request and be done */ + OPAL_THREAD_ADD_FETCH_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); + if( sendreq->rdma_frag == frag ) + sendreq->rdma_frag = NULL; + MCA_PML_OB1_RDMA_FRAG_RETURN(frag); + } } send_request_pml_complete_check(sendreq); @@ -701,6 +705,7 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq, frag->rdma_req = sendreq; frag->rdma_bml = bml_btl; frag->rdma_length = size; + frag->rdma_bytes_remaining = size; frag->cbfunc = mca_pml_ob1_rget_completion; /* do not store the local handle in the fragment. it will be released by mca_pml_ob1_free_rdma_resources */ diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 06e4abb4672..ae8f5afe2c5 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -216,10 +216,7 @@ static inline void mca_pml_ob1_send_request_fini (mca_pml_ob1_send_request_t *se { /* Let the base handle the reference counts */ MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); - if (sendreq->rdma_frag) { - MCA_PML_OB1_RDMA_FRAG_RETURN (sendreq->rdma_frag); - sendreq->rdma_frag = NULL; - } + assert( NULL == sendreq->rdma_frag ); } /* diff --git a/opal/threads/thread_usage.h b/opal/threads/thread_usage.h index 178c8ceaab6..434c69e88df 100644 --- a/opal/threads/thread_usage.h +++ b/opal/threads/thread_usage.h @@ -88,6 +88,10 @@ static inline bool opal_set_using_threads(bool have) } +// Back-ported from master (2019-05-04) as part of +// a16cf0e4dd6df4dea820fecedd5920df632935b8 +typedef volatile size_t opal_atomic_size_t; + /** * Use an atomic operation for increment/decrement if opal_using_threads() * indicates that threads are in use by the application or library. From 8f82c899bcc0dc86c5347f438ac3fb94c65a64cf Mon Sep 17 00:00:00 2001 From: Valentin Petrov Date: Mon, 20 May 2019 11:17:44 +0300 Subject: [PATCH 06/94] Coll/hcoll: don't init opal memhooks unless explicitely requested by user If user sets HCOLL_EXTERNAL_UCM_EVENTS=1 then we try init opal memory framework and register a mem release cb. Otherwise, rely on ucx. Signed-off-by: Valentin Petrov --- ompi/mca/coll/hcoll/coll_hcoll_component.c | 19 ------------------- ompi/mca/coll/hcoll/coll_hcoll_module.c | 21 ++++++++++++++++----- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/ompi/mca/coll/hcoll/coll_hcoll_component.c b/ompi/mca/coll/hcoll/coll_hcoll_component.c index 29ea5689c73..a7a79286a3f 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_component.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_component.c @@ -209,29 +209,10 @@ static int hcoll_open(void) { mca_coll_hcoll_component_t *cm; cm = &mca_coll_hcoll_component; - mca_coll_hcoll_output = opal_output_open(NULL); opal_output_set_verbosity(mca_coll_hcoll_output, cm->hcoll_verbose); - hcoll_rte_fns_setup(); - cm->libhcoll_initialized = false; - - (void)mca_base_framework_open(&opal_memory_base_framework, 0); - - /* Register memory hooks */ - if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == - ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & - opal_mem_hooks_support_level())) - { - setenv("MXM_HCOLL_MEM_ON_DEMAND_MAP", "y", 0); - HCOL_VERBOSE(1, "Enabling on-demand memory mapping"); - cm->using_mem_hooks = 1; - } else { - HCOL_VERBOSE(1, "Disabling on-demand memory mapping"); - cm->using_mem_hooks = 0; - } - return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/hcoll/coll_hcoll_module.c b/ompi/mca/coll/hcoll/coll_hcoll_module.c index 6e2fbdda310..aa262c98492 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_module.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_module.c @@ -301,17 +301,28 @@ mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority) HCOL_ERROR("Hcol library init failed"); return NULL; } - #if HCOLL_API >= HCOLL_VERSION(3,2) - if (cm->using_mem_hooks && cm->init_opts->mem_hook_needed) { + if (cm->init_opts->mem_hook_needed) { #else - if (cm->using_mem_hooks && hcoll_check_mem_release_cb_needed()) { + if (hcoll_check_mem_release_cb_needed()) { #endif - opal_mem_hooks_register_release(mca_coll_hcoll_mem_release_cb, NULL); + rc = mca_base_framework_open(&opal_memory_base_framework, 0); + if (OPAL_SUCCESS != rc) { + HCOL_VERBOSE(1, "failed to initialize memory base framework: %d, " + "memory hooks will not be used", rc); + } else { + if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == + ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & + opal_mem_hooks_support_level())) { + HCOL_VERBOSE(1, "using OPAL memory hooks as external events"); + cm->using_mem_hooks = 1; + opal_mem_hooks_register_release(mca_coll_hcoll_mem_release_cb, NULL); + setenv("MXM_HCOLL_MEM_ON_DEMAND_MAP", "y", 0); + } + } } else { cm->using_mem_hooks = 0; } - copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function*) MPI_COMM_NULL_COPY_FN; del_fn.attr_communicator_delete_fn = hcoll_comm_attr_del_fn; err = ompi_attr_create_keyval(COMM_ATTR, copy_fn, del_fn, &hcoll_comm_attr_keyval, NULL ,0, NULL); From 11cb0f24a51cc971db13ce12578c1f6ff4932281 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 15 May 2019 12:24:22 -0600 Subject: [PATCH 07/94] btl/uct: check for support before disabling UCX memory hooks Signed-off-by: Nathan Hjelm (cherry picked from commit 3e1dd362411f1da5564d3402f65e9b3b74f50759) --- opal/mca/btl/uct/btl_uct_component.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/opal/mca/btl/uct/btl_uct_component.c b/opal/mca/btl/uct/btl_uct_component.c index c8bc9e93775..f968cb9c31c 100644 --- a/opal/mca/btl/uct/btl_uct_component.c +++ b/opal/mca/btl/uct/btl_uct_component.c @@ -17,6 +17,7 @@ * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2019 Google, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -127,7 +128,10 @@ static int mca_btl_uct_component_open(void) mca_btl_uct_component.num_contexts_per_module = MCA_BTL_UCT_MAX_WORKERS; } - if (mca_btl_uct_component.disable_ucx_memory_hooks) { + if (mca_btl_uct_component.disable_ucx_memory_hooks && + ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == + ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & + opal_mem_hooks_support_level()))) { ucm_set_external_event(UCM_EVENT_VM_UNMAPPED); opal_mem_hooks_register_release(mca_btl_uct_mem_release_cb, NULL); } From c14260556662243a739fa6604091925f221e1daf Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Tue, 21 May 2019 11:42:10 +0300 Subject: [PATCH 08/94] SSHMEM/COLL: added sshmem/mpi implementation for shmem_collect call - added MPI based implementation of shmem_collect call Signed-off-by: Sergey Oblomov (cherry picked from commit 7d8cb75b2e344f867021cffd4e656ff08d3945d8) --- oshmem/mca/scoll/mpi/scoll_mpi_ops.c | 63 ++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 13 deletions(-) diff --git a/oshmem/mca/scoll/mpi/scoll_mpi_ops.c b/oshmem/mca/scoll/mpi/scoll_mpi_ops.c index eb03dfec2df..2aa87a0222d 100644 --- a/oshmem/mca/scoll/mpi/scoll_mpi_ops.c +++ b/oshmem/mca/scoll/mpi/scoll_mpi_ops.c @@ -107,16 +107,18 @@ int mca_scoll_mpi_collect(struct oshmem_group_t *group, bool nlong_type, int alg) { + ompi_datatype_t* stype = &ompi_mpi_char.dt; + ompi_datatype_t* rtype = &ompi_mpi_char.dt; mca_scoll_mpi_module_t *mpi_module; - ompi_datatype_t* stype; - ompi_datatype_t* rtype; int rc; + int len; + int i; void *sbuf, *rbuf; + int *disps, *recvcounts; MPI_COLL_VERBOSE(20,"RUNNING MPI ALLGATHER"); mpi_module = (mca_scoll_mpi_module_t *) group->g_scoll.scoll_collect_module; if (nlong_type == true) { - /* Do nothing on zero-length request */ if (OPAL_UNLIKELY(!nlong)) { return OSHMEM_SUCCESS; @@ -124,8 +126,6 @@ int mca_scoll_mpi_collect(struct oshmem_group_t *group, sbuf = (void *) source; rbuf = target; - stype = &ompi_mpi_char.dt; - rtype = &ompi_mpi_char.dt; /* Open SHMEM specification has the following constrains (page 85): * "If using C/C++, nelems must be of type integer. If you are using Fortran, it must be a * default integer value". And also fortran signature says "INTEGER". @@ -159,15 +159,52 @@ int mca_scoll_mpi_collect(struct oshmem_group_t *group, SCOLL_DEFAULT_ALG); } } else { - MPI_COLL_VERBOSE(20,"RUNNING FALLBACK COLLECT"); - PREVIOUS_SCOLL_FN(mpi_module, collect, group, - target, - source, - nlong, - pSync, - nlong_type, - SCOLL_DEFAULT_ALG); + if (INT_MAX < nlong) { + MPI_COLL_VERBOSE(20,"RUNNING FALLBACK COLLECT"); + PREVIOUS_SCOLL_FN(mpi_module, collect, group, + target, + source, + nlong, + pSync, + nlong_type, + SCOLL_DEFAULT_ALG); + return rc; + } + + len = nlong; + disps = malloc(group->proc_count * sizeof(*disps)); + if (disps == NULL) { + rc = OSHMEM_ERR_OUT_OF_RESOURCE; + goto complete; + } + + recvcounts = malloc(group->proc_count * sizeof(*recvcounts)); + if (recvcounts == NULL) { + rc = OSHMEM_ERR_OUT_OF_RESOURCE; + goto failed_mem; + } + + rc = mpi_module->comm->c_coll->coll_allgather(&len, sizeof(len), stype, recvcounts, + sizeof(len), rtype, mpi_module->comm, + mpi_module->comm->c_coll->coll_allgather_module); + if (rc != OSHMEM_SUCCESS) { + goto failed_allgather; + } + + disps[0] = 0; + for (i = 1; i < group->proc_count; i++) { + disps[i] = disps[i - 1] + recvcounts[i - 1]; + } + + rc = mpi_module->comm->c_coll->coll_allgatherv(source, nlong, stype, target, recvcounts, + disps, rtype, mpi_module->comm, + mpi_module->comm->c_coll->coll_allgatherv_module); +failed_allgather: + free(recvcounts); +failed_mem: + free(disps); } +complete: return rc; } From f75d46faa9f436a2c2c4a1e71ac15d67156af064 Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Wed, 15 May 2019 19:33:36 +0300 Subject: [PATCH 09/94] ALLOC_WITH_HINT: added implace realloc - in some cases realloc operation may be completed without allocation of new buffer (and without additional data copy) - added logic to reallocate buffer inplace if possible Signed-off-by: Sergey Oblomov (cherry picked from commit 277c2a9e5c7711098be826e6c154253747fdad9a) --- oshmem/mca/spml/ucx/spml_ucx.c | 3 +- oshmem/mca/sshmem/ucx/sshmem_ucx.h | 13 ++++- oshmem/mca/sshmem/ucx/sshmem_ucx_module.c | 20 ++++--- oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c | 64 ++++++++++++++++++++++- 4 files changed, 88 insertions(+), 12 deletions(-) diff --git a/oshmem/mca/spml/ucx/spml_ucx.c b/oshmem/mca/spml/ucx/spml_ucx.c index a4d81b13182..fa79adafb44 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.c +++ b/oshmem/mca/spml/ucx/spml_ucx.c @@ -554,7 +554,8 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx { ucp_worker_params_t params; ucp_ep_params_t ep_params; - size_t i, j, nprocs = oshmem_num_procs(); + size_t i, nprocs = oshmem_num_procs(); + int j; ucs_status_t err; spml_ucx_mkey_t *ucx_mkey; sshmem_mkey_t *mkey; diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx.h b/oshmem/mca/sshmem/ucx/sshmem_ucx.h index f171fe641b8..3d6bba7018a 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx.h +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx.h @@ -49,10 +49,19 @@ sshmem_ucx_shadow_allocator_t *sshmem_ucx_shadow_create(unsigned count); void sshmem_ucx_shadow_destroy(sshmem_ucx_shadow_allocator_t *allocator); int sshmem_ucx_shadow_alloc(sshmem_ucx_shadow_allocator_t *allocator, unsigned count, unsigned *index); + +/* reallocate existing allocated buffer. if possible - used inplace + * reallocation. + * parameter 'inplace' - out, in case if zero - new buffer was allocated + * (inplace is not possible), user should remove original buffer after data + * is copied, else (if inplace == 0) - no additional action required */ +int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, + unsigned count, unsigned old_index, unsigned *index, + int *inplace); int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator, unsigned index); -size_t sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator, - unsigned index); +unsigned sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator, + unsigned index); END_C_DECLS diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c b/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c index 047343e9c10..d6895875b44 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c @@ -189,7 +189,6 @@ static uct_ib_device_mem_h alloc_device_mem(mca_spml_ucx_t *spml, size_t size, uct_md_h uct_md; void *address; size_t length; - int ret; uct_md = ucp_context_find_tl_md(spml->ucp_context, "mlx5"); if (uct_md == NULL) { @@ -353,8 +352,9 @@ static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size, void* old_ptr, void** new_ptr) { mca_sshmem_ucx_segment_context_t *ctx = s->context; - unsigned alloc_count, index; + unsigned alloc_count, index, old_index, old_alloc_count; int res; + int inplace; if (size > s->seg_size) { return OSHMEM_ERR_OUT_OF_RESOURCE; @@ -371,7 +371,15 @@ static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size, /* Allocate new element. Zero-size allocation should still return a unique * pointer, so allocate 1 byte */ alloc_count = max((size + ALLOC_ELEM_SIZE - 1) / ALLOC_ELEM_SIZE, 1); - res = sshmem_ucx_shadow_alloc(ctx->shadow_allocator, alloc_count, &index); + + if (!old_ptr) { + res = sshmem_ucx_shadow_alloc(ctx->shadow_allocator, alloc_count, &index); + } else { + old_index = sshmem_ucx_memheap_ptr2index(s, old_ptr); + res = sshmem_ucx_shadow_realloc(ctx->shadow_allocator, alloc_count, + old_index, &index, &inplace); + } + if (res != OSHMEM_SUCCESS) { return res; } @@ -379,10 +387,8 @@ static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size, *new_ptr = sshmem_ucx_memheap_index2ptr(s, index); /* Copy to new segment and release old*/ - if (old_ptr) { - unsigned old_index = sshmem_ucx_memheap_ptr2index(s, old_ptr); - unsigned old_alloc_count = sshmem_ucx_shadow_size(ctx->shadow_allocator, - old_index); + if (old_ptr && !inplace) { + old_alloc_count = sshmem_ucx_shadow_size(ctx->shadow_allocator, old_index); sshmem_ucx_memheap_wordcopy(*new_ptr, old_ptr, min(size, old_alloc_count * ALLOC_ELEM_SIZE)); sshmem_ucx_shadow_free(ctx->shadow_allocator, old_index); diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c index 92fa2bb0cfc..d5a25eaf154 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c @@ -107,6 +107,66 @@ static void sshmem_ucx_shadow_merge_blocks(sshmem_ucx_shadow_allocator_t *alloca } } + + +int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, + unsigned count, unsigned old_index, unsigned *index, + int *inplace) +{ + sshmem_ucx_shadow_alloc_elem_t *end = &allocator->elems[allocator->num_elems]; + sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[old_index]; + sshmem_ucx_shadow_alloc_elem_t *next = &elem[elem->block_size]; + unsigned old_count = elem->block_size; + + assert(count > 0); + assert(!sshmem_ucx_shadow_is_free(elem)); + + *inplace = 1; + + if (count == old_count) { + *index = old_index; + return OSHMEM_SUCCESS; + } + + if (count < elem->block_size) { + /* requested block is shorter than allocated block + * then just cut current buffer */ + sshmem_ucx_shadow_set_elem(elem + count, + SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE, + elem->block_size - count); + elem->block_size = count; + *index = old_index; + sshmem_ucx_shadow_merge_blocks(allocator); + return OSHMEM_SUCCESS; + } + + assert(count > old_count); + + /* try to check if next element is free & has enough length */ + if ((next < end) && /* non-last element? */ + sshmem_ucx_shadow_is_free(next) && /* next is free */ + (old_count + next->block_size >= count)) + { + assert(elem < next); + assert(elem + count > next); + assert(elem + count <= end); + assert(next + next->block_size <= end); + + if (old_count + next->block_size > count) { + sshmem_ucx_shadow_set_elem(elem + count, SSHMEM_UCX_SHADOW_ELEM_FLAG_FREE, + old_count + next->block_size - count); + } + + sshmem_ucx_shadow_set_elem(next, 0, 0); + elem->block_size = count; + *index = old_index; + return OSHMEM_SUCCESS; + } + + *inplace = 0; + return sshmem_ucx_shadow_alloc(allocator, count, index); +} + int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator, unsigned index) { @@ -117,8 +177,8 @@ int sshmem_ucx_shadow_free(sshmem_ucx_shadow_allocator_t *allocator, return OSHMEM_SUCCESS; } -size_t sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator, - unsigned index) +unsigned sshmem_ucx_shadow_size(sshmem_ucx_shadow_allocator_t *allocator, + unsigned index) { sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[index]; From 748a5f5e73b6a00ea60d64a7cf8e8b4f9a202126 Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Thu, 16 May 2019 09:38:01 +0300 Subject: [PATCH 10/94] SHADOW ALLOCATOR: minor code optimization Signed-off-by: Sergey Oblomov (cherry picked from commit a51badd627c5cdd3212cd6bedd3daa236cd6c8db) --- oshmem/mca/sshmem/ucx/sshmem_ucx.h | 4 ++-- oshmem/mca/sshmem/ucx/sshmem_ucx_module.c | 2 +- oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx.h b/oshmem/mca/sshmem/ucx/sshmem_ucx.h index 3d6bba7018a..fa264b40f42 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx.h +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx.h @@ -50,9 +50,9 @@ void sshmem_ucx_shadow_destroy(sshmem_ucx_shadow_allocator_t *allocator); int sshmem_ucx_shadow_alloc(sshmem_ucx_shadow_allocator_t *allocator, unsigned count, unsigned *index); -/* reallocate existing allocated buffer. if possible - used inplace +/* Reallocate existing allocated buffer. If possible - used inplace * reallocation. - * parameter 'inplace' - out, in case if zero - new buffer was allocated + * Parameter 'inplace' - out, in case if zero - new buffer was allocated * (inplace is not possible), user should remove original buffer after data * is copied, else (if inplace == 0) - no additional action required */ int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c b/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c index d6895875b44..52b4d560626 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c @@ -335,7 +335,7 @@ static unsigned sshmem_ucx_memheap_ptr2index(map_segment_t *s, void *ptr) return ((char*)ptr - (char*)s->super.va_base) / ALLOC_ELEM_SIZE; } -void sshmem_ucx_memheap_wordcopy(void *dst, void *src, size_t size) +static void sshmem_ucx_memheap_wordcopy(void *dst, void *src, size_t size) { const size_t count = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t); uint64_t *dst64 = (uint64_t*)dst; diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c index d5a25eaf154..9aaf77772f8 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c @@ -128,7 +128,7 @@ int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, return OSHMEM_SUCCESS; } - if (count < elem->block_size) { + if (count < old_count) { /* requested block is shorter than allocated block * then just cut current buffer */ sshmem_ucx_shadow_set_elem(elem + count, From 456c5b90aea606838ce06a2496081807f92eca40 Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Fri, 24 May 2019 09:16:56 +0300 Subject: [PATCH 11/94] OSHMEM: minor optimization of realloc in shadow allocator Signed-off-by: Sergey Oblomov (cherry picked from commit d6a09120244be36d870e791146b5baed93659754) --- oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c index 9aaf77772f8..06922c3e1b7 100644 --- a/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c +++ b/oshmem/mca/sshmem/ucx/sshmem_ucx_shadow.c @@ -113,10 +113,10 @@ int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, unsigned count, unsigned old_index, unsigned *index, int *inplace) { - sshmem_ucx_shadow_alloc_elem_t *end = &allocator->elems[allocator->num_elems]; sshmem_ucx_shadow_alloc_elem_t *elem = &allocator->elems[old_index]; - sshmem_ucx_shadow_alloc_elem_t *next = &elem[elem->block_size]; unsigned old_count = elem->block_size; + sshmem_ucx_shadow_alloc_elem_t *end; + sshmem_ucx_shadow_alloc_elem_t *next; assert(count > 0); assert(!sshmem_ucx_shadow_is_free(elem)); @@ -142,8 +142,10 @@ int sshmem_ucx_shadow_realloc(sshmem_ucx_shadow_allocator_t *allocator, assert(count > old_count); + end = &allocator->elems[allocator->num_elems]; + next = &elem[old_count]; /* try to check if next element is free & has enough length */ - if ((next < end) && /* non-last element? */ + if ((next < end) && /* non-last element? */ sshmem_ucx_shadow_is_free(next) && /* next is free */ (old_count + next->block_size >= count)) { From 69923e78c71febad0a8dd64230742b77c6f6ec35 Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Mon, 20 May 2019 15:17:30 +0300 Subject: [PATCH 12/94] SPML/UCX: added synchronized flush on quiet - added synchronized flush operation on quiet call. - flush is implemented using get operation Signed-off-by: Sergey Oblomov (cherry picked from commit 0b108411f89727a68cd622f3b04c783efa359b8e) --- oshmem/mca/atomic/ucx/atomic_ucx_cswap.c | 5 ++ oshmem/mca/atomic/ucx/atomic_ucx_module.c | 5 ++ oshmem/mca/spml/ucx/spml_ucx.c | 85 ++++++++++++++++++++++- oshmem/mca/spml/ucx/spml_ucx.h | 19 ++++- oshmem/mca/spml/ucx/spml_ucx_component.c | 5 ++ 5 files changed, 115 insertions(+), 4 deletions(-) diff --git a/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c b/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c index 51b07629471..8c5fa1d1a64 100644 --- a/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c +++ b/oshmem/mca/atomic/ucx/atomic_ucx_cswap.c @@ -45,6 +45,11 @@ int mca_atomic_ucx_cswap(shmem_ctx_t ctx, UCP_ATOMIC_FETCH_OP_CSWAP, cond, prev, size, rva, ucx_mkey->rkey, opal_common_ucx_empty_complete_cb); + + if (OPAL_LIKELY(!UCS_PTR_IS_ERR(status_ptr))) { + mca_spml_ucx_remote_op_posted(ucx_ctx, pe); + } + return opal_common_ucx_wait_request(status_ptr, ucx_ctx->ucp_worker, "ucp_atomic_fetch_nb"); } diff --git a/oshmem/mca/atomic/ucx/atomic_ucx_module.c b/oshmem/mca/atomic/ucx/atomic_ucx_module.c index 91d4551e457..882b83f6520 100644 --- a/oshmem/mca/atomic/ucx/atomic_ucx_module.c +++ b/oshmem/mca/atomic/ucx/atomic_ucx_module.c @@ -51,6 +51,11 @@ int mca_atomic_ucx_op(shmem_ctx_t ctx, status = ucp_atomic_post(ucx_ctx->ucp_peers[pe].ucp_conn, op, value, size, rva, ucx_mkey->rkey); + + if (OPAL_LIKELY(UCS_OK == status)) { + mca_spml_ucx_remote_op_posted(ucx_ctx, pe); + } + return ucx_status_to_oshmem(status); } diff --git a/oshmem/mca/spml/ucx/spml_ucx.c b/oshmem/mca/spml/ucx/spml_ucx.c index a4d81b13182..9ed672524a6 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.c +++ b/oshmem/mca/spml/ucx/spml_ucx.c @@ -80,7 +80,8 @@ mca_spml_ucx_t mca_spml_ucx = { .num_disconnect = 1, .heap_reg_nb = 0, .enabled = 0, - .get_mkey_slow = NULL + .get_mkey_slow = NULL, + .synchronized_quiet = false }; mca_spml_ucx_ctx_t mca_spml_ucx_ctx_default = { @@ -216,6 +217,40 @@ static void dump_address(int pe, char *addr, size_t len) static char spml_ucx_transport_ids[1] = { 0 }; +int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs) +{ + int res; + + if (mca_spml_ucx.synchronized_quiet) { + ctx->put_proc_indexes = malloc(nprocs * sizeof(*ctx->put_proc_indexes)); + if (NULL == ctx->put_proc_indexes) { + return OSHMEM_ERR_OUT_OF_RESOURCE; + } + + OBJ_CONSTRUCT(&ctx->put_op_bitmap, opal_bitmap_t); + res = opal_bitmap_init(&ctx->put_op_bitmap, nprocs); + if (OPAL_SUCCESS != res) { + free(ctx->put_proc_indexes); + ctx->put_proc_indexes = NULL; + return res; + } + + ctx->put_proc_count = 0; + } + + return OSHMEM_SUCCESS; +} + +int mca_spml_ucx_clear_put_op_mask(mca_spml_ucx_ctx_t *ctx) +{ + if (mca_spml_ucx.synchronized_quiet && ctx->put_proc_indexes) { + OBJ_DESTRUCT(&ctx->put_op_bitmap); + free(ctx->put_proc_indexes); + } + + return OSHMEM_SUCCESS; +} + int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) { size_t i, j, n; @@ -235,6 +270,11 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) goto error; } + rc = mca_spml_ucx_init_put_op_mask(&mca_spml_ucx_ctx_default, nprocs); + if (OSHMEM_SUCCESS != rc) { + goto error; + } + err = ucp_worker_get_address(mca_spml_ucx_ctx_default.ucp_worker, &wk_local_addr, &wk_addr_len); if (err != UCS_OK) { goto error; @@ -297,6 +337,8 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) free(mca_spml_ucx.remote_addrs_tbl[i]); } } + + mca_spml_ucx_clear_put_op_mask(&mca_spml_ucx_ctx_default); if (mca_spml_ucx_ctx_default.ucp_peers) free(mca_spml_ucx_ctx_default.ucp_peers); if (mca_spml_ucx.remote_addrs_tbl) @@ -583,6 +625,11 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx goto error; } + rc = mca_spml_ucx_init_put_op_mask(ucx_ctx, nprocs); + if (OSHMEM_SUCCESS != rc) { + goto error2; + } + for (i = 0; i < nprocs; i++) { ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = (ucp_address_t *)(mca_spml_ucx.remote_addrs_tbl[i]); @@ -621,6 +668,8 @@ static int mca_spml_ucx_ctx_create_common(long options, mca_spml_ucx_ctx_t **ucx } } + mca_spml_ucx_clear_put_op_mask(ucx_ctx); + if (ucx_ctx->ucp_peers) free(ucx_ctx->ucp_peers); @@ -715,6 +764,7 @@ int mca_spml_ucx_put(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_add void *rva; spml_ucx_mkey_t *ucx_mkey; mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; + int res; #if HAVE_DECL_UCP_PUT_NB ucs_status_ptr_t request; #else @@ -725,12 +775,18 @@ int mca_spml_ucx_put(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_add #if HAVE_DECL_UCP_PUT_NB request = ucp_put_nb(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size, (uint64_t)rva, ucx_mkey->rkey, opal_common_ucx_empty_complete_cb); - return opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker, "ucp_put_nb"); + res = opal_common_ucx_wait_request(request, ucx_ctx->ucp_worker, "ucp_put_nb"); #else status = ucp_put(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size, (uint64_t)rva, ucx_mkey->rkey); - return ucx_status_to_oshmem(status); + res = ucx_status_to_oshmem(status); #endif + + if (OPAL_LIKELY(OSHMEM_SUCCESS == res)) { + mca_spml_ucx_remote_op_posted(ucx_ctx, dst); + } + + return res; } int mca_spml_ucx_put_nb(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_addr, int dst, void **handle) @@ -744,6 +800,10 @@ int mca_spml_ucx_put_nb(shmem_ctx_t ctx, void* dst_addr, size_t size, void* src_ status = ucp_put_nbi(ucx_ctx->ucp_peers[dst].ucp_conn, src_addr, size, (uint64_t)rva, ucx_mkey->rkey); + if (OPAL_LIKELY(status >= 0)) { + mca_spml_ucx_remote_op_posted(ucx_ctx, dst); + } + return ucx_status_to_oshmem_nb(status); } @@ -767,9 +827,28 @@ int mca_spml_ucx_fence(shmem_ctx_t ctx) int mca_spml_ucx_quiet(shmem_ctx_t ctx) { + int flush_get_data; int ret; + unsigned i; + int idx; mca_spml_ucx_ctx_t *ucx_ctx = (mca_spml_ucx_ctx_t *)ctx; + if (mca_spml_ucx.synchronized_quiet) { + for (i = 0; i < ucx_ctx->put_proc_count; i++) { + idx = ucx_ctx->put_proc_indexes[i]; + ret = mca_spml_ucx_get_nb(ctx, + ucx_ctx->ucp_peers[idx].mkeys->super.super.va_base, + sizeof(flush_get_data), &flush_get_data, idx, NULL); + if (OMPI_SUCCESS != ret) { + oshmem_shmem_abort(-1); + return ret; + } + + opal_bitmap_clear_bit(&ucx_ctx->put_op_bitmap, idx); + } + ucx_ctx->put_proc_count = 0; + } + opal_atomic_wmb(); ret = opal_common_ucx_worker_flush(ucx_ctx->ucp_worker); diff --git a/oshmem/mca/spml/ucx/spml_ucx.h b/oshmem/mca/spml/ucx/spml_ucx.h index f697f3564b6..95c56622351 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.h +++ b/oshmem/mca/spml/ucx/spml_ucx.h @@ -33,6 +33,7 @@ #include "opal/class/opal_free_list.h" #include "opal/class/opal_list.h" +#include "opal/class/opal_bitmap.h" #include "orte/runtime/orte_globals.h" #include "opal/mca/common/ucx/common_ucx.h" @@ -70,6 +71,9 @@ struct mca_spml_ucx_ctx { ucp_worker_h ucp_worker; ucp_peer_t *ucp_peers; long options; + opal_bitmap_t put_op_bitmap; + int *put_proc_indexes; + unsigned put_proc_count; }; typedef struct mca_spml_ucx_ctx mca_spml_ucx_ctx_t; @@ -104,7 +108,7 @@ struct mca_spml_ucx { mca_spml_ucx_ctx_t *aux_ctx; pthread_spinlock_t async_lock; int aux_refcnt; - + bool synchronized_quiet; }; typedef struct mca_spml_ucx mca_spml_ucx_t; @@ -171,6 +175,9 @@ extern int spml_ucx_ctx_progress(void); extern int spml_ucx_progress_aux_ctx(void); void mca_spml_ucx_async_cb(int fd, short event, void *cbdata); +int mca_spml_ucx_init_put_op_mask(mca_spml_ucx_ctx_t *ctx, size_t nprocs); +int mca_spml_ucx_clear_put_op_mask(mca_spml_ucx_ctx_t *ctx); + static inline void mca_spml_ucx_aux_lock(void) { if (mca_spml_ucx.async_progress) { @@ -224,6 +231,16 @@ static inline int ucx_status_to_oshmem_nb(ucs_status_t status) #endif } +static inline void mca_spml_ucx_remote_op_posted(mca_spml_ucx_ctx_t *ctx, int dst) +{ + if (OPAL_UNLIKELY(mca_spml_ucx.synchronized_quiet)) { + if (!opal_bitmap_is_set_bit(&ctx->put_op_bitmap, dst)) { + ctx->put_proc_indexes[ctx->put_proc_count++] = dst; + opal_bitmap_set_bit(&ctx->put_op_bitmap, dst); + } + } +} + #define MCA_SPML_UCX_CTXS_ARRAY_SIZE 64 #define MCA_SPML_UCX_CTXS_ARRAY_INC 64 diff --git a/oshmem/mca/spml/ucx/spml_ucx_component.c b/oshmem/mca/spml/ucx/spml_ucx_component.c index 900349f9e6f..0f0ce2a15a3 100644 --- a/oshmem/mca/spml/ucx/spml_ucx_component.c +++ b/oshmem/mca/spml/ucx/spml_ucx_component.c @@ -128,6 +128,10 @@ static int mca_spml_ucx_component_register(void) "Asynchronous progress tick granularity (in usec)", &mca_spml_ucx.async_tick); + mca_spml_ucx_param_register_bool("synchronized_quiet", 0, + "Use synchronized quiet on shmem_quiet or shmem_barrier_all operations", + &mca_spml_ucx.synchronized_quiet); + opal_common_ucx_mca_var_register(&mca_spml_ucx_component.spmlm_version); return OSHMEM_SUCCESS; @@ -329,6 +333,7 @@ static void _ctx_cleanup(mca_spml_ucx_ctx_t *ctx) mca_spml_ucx.num_disconnect, ctx->ucp_worker); free(del_procs); + mca_spml_ucx_clear_put_op_mask(ctx); free(ctx->ucp_peers); } From 5f79dfaa0ae90b7a285b9d8f81b1355168206c87 Mon Sep 17 00:00:00 2001 From: Mark Allen Date: Wed, 27 Mar 2019 14:12:20 -0400 Subject: [PATCH 13/94] shmat/shmdt additions for patcher This is mostly based off recent UCX additions to their patcher: https://github.com/openucx/ucx/pull/2703 They added triggers for * mmap when (flags & MAP_FIXED) && (addr != NULL) * shmat when (shmflg & SHM_REMAP) && (shmaddr != NULL) Beyond that I noticed they already had a trigger for * madvise when (advice == MADV_FREE) that we didn't so I added that. And the other main thing is we didn't really have shmat/shmdt active for some systems because we only had a path for syscall(SYS_shmdt, ) but we needed to also have a path for syscall(SYS_ipc, IPCOP_shmdt, ) and same for shmat. Signed-off-by: Mark Allen (cherry picked from commit eb888118e83f56c131aff900b03eab34c92b7805) --- .../memory/patcher/memory_patcher_component.c | 122 ++++++++++++++---- 1 file changed, 98 insertions(+), 24 deletions(-) diff --git a/opal/mca/memory/patcher/memory_patcher_component.c b/opal/mca/memory/patcher/memory_patcher_component.c index bf676dbdca9..5db3a6016f8 100644 --- a/opal/mca/memory/patcher/memory_patcher_component.c +++ b/opal/mca/memory/patcher/memory_patcher_component.c @@ -15,7 +15,7 @@ * reserved. * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2019 IBM Corporation. All rights reserved. * * $COPYRIGHT$ * @@ -48,6 +48,9 @@ #if defined(HAVE_LINUX_MMAN_H) #include #endif +#if defined(HAVE_SYS_IPC_H) +#include +#endif #include "memory_patcher.h" #undef opal_memory_changed @@ -104,15 +107,7 @@ opal_memory_patcher_component_t mca_memory_patcher_component = { * data. If this can be resolved the two levels can be joined. */ -/* - * The following block of code is #if 0'ed out because we do not need - * to intercept mmap() any more (mmap() only deals with memory - * protection; it does not invalidate any rcache entries for a given - * region). But if we do someday, this is the code that we'll need. - * It's a little non-trivial, so we might as well keep it (and #if 0 - * it out). - */ -#if 0 +#if defined (SYS_mmap) #if defined(HAVE___MMAP) && !HAVE_DECL___MMAP /* prototype for Apple's internal mmap function */ @@ -121,12 +116,11 @@ void *__mmap (void *start, size_t length, int prot, int flags, int fd, off_t off static void *(*original_mmap)(void *, size_t, int, int, int, off_t); -static void *intercept_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) +static void *_intercept_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) { - OPAL_PATCHER_BEGIN; void *result = 0; - if (prot == PROT_NONE) { + if ((flags & MAP_FIXED) && (start != NULL)) { opal_mem_hooks_release_hook (start, length, true); } @@ -137,19 +131,20 @@ static void *intercept_mmap(void *start, size_t length, int prot, int flags, int #else result = (void*)(intptr_t) memory_patcher_syscall(SYS_mmap, start, length, prot, flags, fd, offset); #endif - - // I thought we had some issue in the past with the above line for IA32, - // like maybe syscall() wouldn't handle that many arguments. But just now - // I used gcc -m32 and it worked on a recent system. But there's a possibility - // that older ia32 systems may need some other code to make the above syscall. } else { result = original_mmap (start, length, prot, flags, fd, offset); } - OPAL_PATCHER_END; return result; } +static void *intercept_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) +{ + OPAL_PATCHER_BEGIN; + void *result = _intercept_mmap (start, length, prot, flags, fd, offset); + OPAL_PATCHER_END; + return result; +} #endif #if defined (SYS_munmap) @@ -256,6 +251,9 @@ static int _intercept_madvise (void *start, size_t length, int advice) int result = 0; if (advice == MADV_DONTNEED || +#ifdef MADV_FREE + advice == MADV_FREE || +#endif #ifdef MADV_REMOVE advice == MADV_REMOVE || #endif @@ -341,7 +339,12 @@ static int intercept_brk (void *addr) #endif -#if defined(SYS_shmdt) && defined(__linux__) +#define HAS_SHMDT (defined(SYS_shmdt) || \ + (defined(IPCOP_shmdt) && defined(SYS_ipc))) +#define HAS_SHMAT (defined(SYS_shmat) || \ + (defined(IPCOP_shmat) && defined(SYS_ipc))) + +#if (HAS_SHMDT || HAS_SHMAT) && defined(__linux__) #include #include @@ -404,6 +407,68 @@ static size_t memory_patcher_get_shm_seg_size (const void *shmaddr) return seg_size; } +static size_t get_shm_size(int shmid) +{ + struct shmid_ds ds; + int ret; + + ret = shmctl(shmid, IPC_STAT, &ds); + if (ret < 0) { + return 0; + } + + return ds.shm_segsz; +} +#endif + +#if HAS_SHMAT && defined(__linux__) +static void *(*original_shmat)(int shmid, const void *shmaddr, int shmflg); + +static void *_intercept_shmat(int shmid, const void *shmaddr, int shmflg) +{ + void *result = 0; + + size_t size = get_shm_size(shmid); + + if ((shmflg & SHM_REMAP) && (shmaddr != NULL)) { +// I don't really know what REMAP combined with SHM_RND does, so I'll just +// guess it remaps all the way down to the lower attach_addr, and all the +// way up to the original shmaddr+size + uintptr_t attach_addr = (uintptr_t)shmaddr; + + if (shmflg & SHM_RND) { + attach_addr -= ((uintptr_t)shmaddr) % SHMLBA; + size += ((uintptr_t)shmaddr) % SHMLBA; + } + opal_mem_hooks_release_hook ((void*)attach_addr, size, false); + } + + if (!original_shmat) { +#if defined(SYS_shmat) + result = memory_patcher_syscall(SYS_shmat, shmid, shmaddr, shmflg); +#else // IPCOP_shmat + unsigned long ret; + ret = memory_patcher_syscall(SYS_ipc, IPCOP_shmat, + shmid, shmflg, &shmaddr, shmaddr); + result = (ret > -(unsigned long)SHMLBA) ? (void *)ret : (void *)shmaddr; +#endif + } else { + result = original_shmat (shmid, shmaddr, shmflg); + } + + return result; +} + +static void* intercept_shmat (int shmid, const void * shmaddr, int shmflg) +{ + OPAL_PATCHER_BEGIN; + void *result = _intercept_shmat (shmid, shmaddr, shmflg); + OPAL_PATCHER_END; + return result; +} +#endif + +#if HAS_SHMDT && defined(__linux__) static int (*original_shmdt) (const void *); static int _intercept_shmdt (const void *shmaddr) @@ -417,7 +482,11 @@ static int _intercept_shmdt (const void *shmaddr) if (original_shmdt) { result = original_shmdt (shmaddr); } else { +#if defined(SYS_shmdt) result = memory_patcher_syscall (SYS_shmdt, shmaddr); +#else // IPCOP_shmdt + result = memory_patcher_syscall(SYS_ipc, IPCOP_shmdt, 0, 0, 0, shmaddr); +#endif } return result; @@ -478,9 +547,7 @@ static int patcher_open (void) /* set memory hooks support level */ opal_mem_hooks_set_support (OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT); -#if 0 - /* See above block to see why mmap() functionality is #if 0'ed - out */ +#if defined (SYS_mmap) rc = opal_patcher->patch_symbol ("mmap", (uintptr_t) intercept_mmap, (uintptr_t *) &original_mmap); if (OPAL_SUCCESS != rc) { return rc; @@ -508,7 +575,14 @@ static int patcher_open (void) } #endif -#if defined(SYS_shmdt) && defined(__linux__) +#if HAS_SHMAT && defined(__linux__) + rc = opal_patcher->patch_symbol ("shmat", (uintptr_t) intercept_shmat, (uintptr_t *) &original_shmat); + if (OPAL_SUCCESS != rc) { + return rc; + } +#endif + +#if HAS_SHMDT && defined(__linux__) rc = opal_patcher->patch_symbol ("shmdt", (uintptr_t) intercept_shmdt, (uintptr_t *) &original_shmdt); if (OPAL_SUCCESS != rc) { return rc; From cadf315ca9fe9bb12485fcb17359e050289cc784 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Fri, 17 May 2019 15:10:06 -0400 Subject: [PATCH 14/94] Fixed SPC/MPI_T initialization error. Signed-off-by: Yong Qin --- ompi/runtime/ompi_spc.c | 61 +++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/ompi/runtime/ompi_spc.c b/ompi/runtime/ompi_spc.c index caee2cda6f2..15ca42aa791 100644 --- a/ompi/runtime/ompi_spc.c +++ b/ompi/runtime/ompi_spc.c @@ -276,7 +276,7 @@ void ompi_spc_events_init(void) */ void ompi_spc_init(void) { - int i, j, ret, found = 0, all_on = 0; + int i, j, ret, found = 0, all_on = 0, matched = 0; /* Initialize the clock frequency variable as the CPU's frequency in MHz */ sys_clock_freq_mhz = opal_timer_base_get_freq() / 1000000; @@ -287,6 +287,14 @@ void ompi_spc_init(void) char **arg_strings = opal_argv_split(ompi_mpi_spc_attach_string, ','); int num_args = opal_argv_count(arg_strings); + /* Reset all timer-based counters */ + for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) { + CLEAR_SPC_BIT(ompi_spc_timer_event, i); + } + + /* If this is a timer event, set the corresponding timer_event entry */ + SET_SPC_BIT(ompi_spc_timer_event, OMPI_SPC_MATCH_TIME); + /* If there is only one argument and it is 'all', then all counters * should be turned on. If the size is 0, then no counters will be enabled. */ @@ -299,49 +307,44 @@ void ompi_spc_init(void) /* Turn on only the counters that were specified in the MCA parameter */ for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) { if(all_on) { - SET_SPC_BIT(ompi_spc_attached_event, i); - mpi_t_enabled = true; found++; } else { + matched = 0; /* Note: If no arguments were given, this will be skipped */ for(j = 0; j < num_args; j++) { if( 0 == strcmp(ompi_spc_events_names[i].counter_name, arg_strings[j]) ) { - SET_SPC_BIT(ompi_spc_attached_event, i); - mpi_t_enabled = true; found++; + matched = 1; break; } } } - /* ######################################################################## - * ################## Add Timer-Based Counter Enums Here ################## - * ######################################################################## - */ - CLEAR_SPC_BIT(ompi_spc_timer_event, i); - - /* Registers the current counter as an MPI_T pvar regardless of whether it's been turned on or not */ - ret = mca_base_pvar_register("ompi", "runtime", "spc", ompi_spc_events_names[i].counter_name, ompi_spc_events_names[i].counter_description, - OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, - MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT, - MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, - ompi_spc_get_count, NULL, ompi_spc_notify, NULL); + if (all_on || matched) { + SET_SPC_BIT(ompi_spc_attached_event, i); + mpi_t_enabled = true; - /* Check to make sure that ret is a valid index and not an error code. - */ - if( ret >= 0 ) { - if( mpi_t_offset == -1 ) { - mpi_t_offset = ret; + /* Registers the current counter as an MPI_T pvar regardless of whether it's been turned on or not */ + ret = mca_base_pvar_register("ompi", "runtime", "spc", ompi_spc_events_names[i].counter_name, ompi_spc_events_names[i].counter_description, + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, + MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT, + MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, + ompi_spc_get_count, NULL, ompi_spc_notify, NULL); + + /* Check to make sure that ret is a valid index and not an error code */ + if( ret >= 0 ) { + if( mpi_t_offset == -1 ) { + mpi_t_offset = ret; + } + } + if( (ret < 0) || (ret != (mpi_t_offset + found - 1)) ) { + mpi_t_enabled = false; + opal_show_help("help-mpi-runtime.txt", "spc: MPI_T disabled", true); + break; } - } - if( (ret < 0) || (ret != (mpi_t_offset + found - 1)) ) { - mpi_t_enabled = false; - opal_show_help("help-mpi-runtime.txt", "spc: MPI_T disabled", true); - break; } } - /* If this is a timer event, sent the corresponding timer_event entry to 1 */ - SET_SPC_BIT(ompi_spc_timer_event, OMPI_SPC_MATCH_TIME); + opal_argv_free(arg_strings); } From dbf89404d768046b4c79c403fa07d8ae8b28ab8d Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Fri, 17 May 2019 15:32:17 -0400 Subject: [PATCH 15/94] Fix the SPC initialization. Use the PVAR ctx to save the SPC index, so that no lookup nor restriction on the SPC vars position is imposed. Make sure the PVAR are always registered. Signed-off-by: George Bosilca --- ompi/runtime/ompi_spc.c | 90 +++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 52 deletions(-) diff --git a/ompi/runtime/ompi_spc.c b/ompi/runtime/ompi_spc.c index 15ca42aa791..d88f290aaaf 100644 --- a/ompi/runtime/ompi_spc.c +++ b/ompi/runtime/ompi_spc.c @@ -1,11 +1,13 @@ /* - * Copyright (c) 2018 The University of Tennessee and The University + * Copyright (c) 2018-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * * Copyright (c) 2018 Cisco Systems, Inc. All rights reserved * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,10 +22,8 @@ opal_timer_t sys_clock_freq_mhz = 0; static void ompi_spc_dump(void); /* Array for converting from SPC indices to MPI_T indices */ -OMPI_DECLSPEC int mpi_t_offset = -1; -OMPI_DECLSPEC bool mpi_t_enabled = false; - -OPAL_DECLSPEC ompi_communicator_t *comm = NULL; +static bool mpi_t_enabled = false; +static ompi_communicator_t *ompi_spc_comm = NULL; typedef struct ompi_spc_event_t { const char* counter_name; @@ -185,6 +185,8 @@ static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, v return MPI_SUCCESS; } + index = (int)(uintptr_t)pvar->ctx; /* Convert from MPI_T pvar index to SPC index */ + /* For this event, we need to set count to the number of long long type * values for this counter. All SPC counters are one long long, so we * always set count to 1. @@ -194,14 +196,10 @@ static int ompi_spc_notify(mca_base_pvar_t *pvar, mca_base_pvar_event_t event, v } /* For this event, we need to turn on the counter */ else if(MCA_BASE_PVAR_HANDLE_START == event) { - /* Convert from MPI_T pvar index to SPC index */ - index = pvar->pvar_index - mpi_t_offset; SET_SPC_BIT(ompi_spc_attached_event, index); } /* For this event, we need to turn off the counter */ else if(MCA_BASE_PVAR_HANDLE_STOP == event) { - /* Convert from MPI_T pvar index to SPC index */ - index = pvar->pvar_index - mpi_t_offset; CLEAR_SPC_BIT(ompi_spc_attached_event, index); } @@ -231,7 +229,7 @@ static int ompi_spc_get_count(const struct mca_base_pvar_t *pvar, void *value, v } /* Convert from MPI_T pvar index to SPC index */ - int index = pvar->pvar_index - mpi_t_offset; + int index = (int)(uintptr_t)pvar->ctx; /* Set the counter value to the current SPC value */ *counter_value = (long long)ompi_spc_events[index].value; /* If this is a timer-based counter, convert from cycles to microseconds */ @@ -268,7 +266,7 @@ void ompi_spc_events_init(void) ompi_spc_events[i].value = 0; } - ompi_comm_dup(&ompi_mpi_comm_world.comm, &comm); + ompi_comm_dup(&ompi_mpi_comm_world.comm, &ompi_spc_comm); } /* Initializes the SPC data structures and registers all counters as MPI_T pvars. @@ -287,14 +285,6 @@ void ompi_spc_init(void) char **arg_strings = opal_argv_split(ompi_mpi_spc_attach_string, ','); int num_args = opal_argv_count(arg_strings); - /* Reset all timer-based counters */ - for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) { - CLEAR_SPC_BIT(ompi_spc_timer_event, i); - } - - /* If this is a timer event, set the corresponding timer_event entry */ - SET_SPC_BIT(ompi_spc_timer_event, OMPI_SPC_MATCH_TIME); - /* If there is only one argument and it is 'all', then all counters * should be turned on. If the size is 0, then no counters will be enabled. */ @@ -304,47 +294,43 @@ void ompi_spc_init(void) } } - /* Turn on only the counters that were specified in the MCA parameter */ for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) { - if(all_on) { - found++; - } else { - matched = 0; - /* Note: If no arguments were given, this will be skipped */ + /* Reset all timer-based counters */ + CLEAR_SPC_BIT(ompi_spc_timer_event, i); + matched = all_on; + + if( !matched ) { + /* Turn on only the counters that were specified in the MCA parameter */ for(j = 0; j < num_args; j++) { if( 0 == strcmp(ompi_spc_events_names[i].counter_name, arg_strings[j]) ) { - found++; matched = 1; break; } } } - if (all_on || matched) { + if (matched) { SET_SPC_BIT(ompi_spc_attached_event, i); mpi_t_enabled = true; + found++; + } - /* Registers the current counter as an MPI_T pvar regardless of whether it's been turned on or not */ - ret = mca_base_pvar_register("ompi", "runtime", "spc", ompi_spc_events_names[i].counter_name, ompi_spc_events_names[i].counter_description, - OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, - MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT, - MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, - ompi_spc_get_count, NULL, ompi_spc_notify, NULL); - - /* Check to make sure that ret is a valid index and not an error code */ - if( ret >= 0 ) { - if( mpi_t_offset == -1 ) { - mpi_t_offset = ret; - } - } - if( (ret < 0) || (ret != (mpi_t_offset + found - 1)) ) { - mpi_t_enabled = false; - opal_show_help("help-mpi-runtime.txt", "spc: MPI_T disabled", true); - break; - } + /* Registers the current counter as an MPI_T pvar regardless of whether it's been turned on or not */ + ret = mca_base_pvar_register("ompi", "runtime", "spc", ompi_spc_events_names[i].counter_name, ompi_spc_events_names[i].counter_description, + OPAL_INFO_LVL_4, MPI_T_PVAR_CLASS_SIZE, + MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, MPI_T_BIND_NO_OBJECT, + MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, + ompi_spc_get_count, NULL, ompi_spc_notify, (void*)(uintptr_t)i); + if( ret < 0 ) { + mpi_t_enabled = false; + opal_show_help("help-mpi-runtime.txt", "spc: MPI_T disabled", true); + break; } } + /* If this is a timer event, set the corresponding timer_event entry */ + SET_SPC_BIT(ompi_spc_timer_event, OMPI_SPC_MATCH_TIME); + opal_argv_free(arg_strings); } @@ -356,8 +342,8 @@ static void ompi_spc_dump(void) int i, j, world_size, offset; long long *recv_buffer = NULL, *send_buffer; - int rank = ompi_comm_rank(comm); - world_size = ompi_comm_size(comm); + int rank = ompi_comm_rank(ompi_spc_comm); + world_size = ompi_comm_size(ompi_spc_comm); /* Convert from cycles to usecs before sending */ for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) { @@ -384,10 +370,10 @@ static void ompi_spc_dump(void) return; } } - (void)comm->c_coll->coll_gather(send_buffer, OMPI_SPC_NUM_COUNTERS, MPI_LONG_LONG, + (void)ompi_spc_comm->c_coll->coll_gather(send_buffer, OMPI_SPC_NUM_COUNTERS, MPI_LONG_LONG, recv_buffer, OMPI_SPC_NUM_COUNTERS, MPI_LONG_LONG, - 0, comm, - comm->c_coll->coll_gather_module); + 0, ompi_spc_comm, + ompi_spc_comm->c_coll->coll_gather_module); /* Once rank 0 has all of the information, print the aggregated counter values for each rank in order */ if(rank == 0) { @@ -413,7 +399,7 @@ static void ompi_spc_dump(void) } free(send_buffer); - comm->c_coll->coll_barrier(comm, comm->c_coll->coll_barrier_module); + ompi_spc_comm->c_coll->coll_barrier(ompi_spc_comm, ompi_spc_comm->c_coll->coll_barrier_module); } /* Frees any dynamically alocated OMPI SPC data structures */ @@ -424,7 +410,7 @@ void ompi_spc_fini(void) } free(ompi_spc_events); ompi_spc_events = NULL; - ompi_comm_free(&comm); + ompi_comm_free(&ompi_spc_comm); } /* Records an update to a counter using an atomic add operation. */ From a8d5da67db5db6e955de7b1c9974d6ba9291e27d Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 29 May 2019 00:19:52 -0400 Subject: [PATCH 16/94] Fix the man pages for some of the MPI_T_* functions. Signed-off-by: George Bosilca --- ompi/mpi/man/man3/MPI_T_cvar_handle_alloc.3in | 1 + ompi/mpi/man/man3/MPI_T_finalize.3in | 1 + ompi/mpi/man/man3/MPI_T_pvar_handle_alloc.3in | 24 ++++++++++++++----- .../man/man3/MPI_T_pvar_session_create.3in | 1 + ompi/mpi/man/man3/MPI_T_pvar_start.3in | 1 + ompi/mpi/man/man3/MPI_T_pvar_write.3in | 2 +- 6 files changed, 23 insertions(+), 7 deletions(-) diff --git a/ompi/mpi/man/man3/MPI_T_cvar_handle_alloc.3in b/ompi/mpi/man/man3/MPI_T_cvar_handle_alloc.3in index 87e5f9f2efe..fa393b5cf66 100644 --- a/ompi/mpi/man/man3/MPI_T_cvar_handle_alloc.3in +++ b/ompi/mpi/man/man3/MPI_T_cvar_handle_alloc.3in @@ -20,6 +20,7 @@ int MPI_T_cvar_handle_alloc(int \fIcvar_index\fP, void *\fIobj_handle\fP, int MPI_T_cvar_handle_free(MPI_T_cvar_handle *\fIhandle\fP) +.fi .SH DESCRIPTION .ft R MPI_T_cvar_handle_alloc binds the control variable specified in \fIcvar_index\fP to the MPI diff --git a/ompi/mpi/man/man3/MPI_T_finalize.3in b/ompi/mpi/man/man3/MPI_T_finalize.3in index ef7ec71824c..7cb2b7dce2c 100644 --- a/ompi/mpi/man/man3/MPI_T_finalize.3in +++ b/ompi/mpi/man/man3/MPI_T_finalize.3in @@ -15,6 +15,7 @@ #include int MPI_T_finalize(void) +.fi .SH DESCRIPTION .ft R MPI_T_finalize() finalizes the MPI tool information interface and must be called the same diff --git a/ompi/mpi/man/man3/MPI_T_pvar_handle_alloc.3in b/ompi/mpi/man/man3/MPI_T_pvar_handle_alloc.3in index 1c9c844f148..dfcd4d19f55 100644 --- a/ompi/mpi/man/man3/MPI_T_pvar_handle_alloc.3in +++ b/ompi/mpi/man/man3/MPI_T_pvar_handle_alloc.3in @@ -15,17 +15,28 @@ .SH C Syntax .nf #include -int MPI_T_pvar_handle_alloc(int \fIpvar_index\fP, void *\fIobj_handle\fP, +int MPI_T_pvar_handle_alloc(int \fIsession\fP, int \fIpvar_index\fP, void *\fIobj_handle\fP, MPI_T_pvar_handle *\fIhandle\fP, int *\fIcount\fP) -int MPI_T_pvar_handle_free(MPI_T_pvar_handle *\fIhandle\fP) +int MPI_T_pvar_handle_free(int \fIsession\fP, MPI_T_pvar_handle *\fIhandle\fP) .SH DESCRIPTION .ft R MPI_T_pvar_handle_alloc binds the performance variable specified in \fIpvar_index\fP to the MPI -object specified in \fIobj_handle\fP. If MPI_T_pvar_get_info returns MPI_T_BIND_NO_OBJECT -as the binding for the variable the \fIobj_handle\fP argument is ignored. The number of -values represented by this performance variable is returned in the \fIcount\fP parameter. +object specified in \fIobj_handle\fP in the session identified by the parameter +\fIsession\fP. The object is passed in the argument \fIobj_handle\fP as an +address to a local variable that stores the object’s handle. If +MPI_T_pvar_get_info returns MPI_T_BIND_NO_OBJECT as the binding +for the variable the \fIobj_handle\fP argument is ignored. The handle +allocated to reference the variable is returned in the argument \fIhandle\fP. Upon successful +return, \fIcount\fP contains the number of elements (of the datatype returned by a previous +MPI_T_PVAR_GET_INFO call) used to represent this variable. + +The value of \fIpvar_index\fP should be in the range 0 to \fInum_pvar - 1\fP, +where \fInum_pvar\fP is the number of available performance variables as +determined from a prior call to \fIMPI_T_PVAR_GET_NUM\fP. The type of the +MPI object it references must be consistent with the type returned in the +bind argument in a prior call to \fIMPI_T_PVAR_GET_INFO\fP. MPI_T_pvar_handle_free frees a handle allocated by MPI_T_pvar_handle_alloc and sets the \fIhandle\fP argument to MPI_T_PVAR_HANDLE_NULL. @@ -50,11 +61,12 @@ MPI_T_pvar_handle_free() will fail if: The MPI Tools interface not initialized .TP 1i [MPI_T_ERR_INVALID_HANDLE] -The handle is invalid +The handle is invalid or the handle argument passed in is not associated with the session argument .SH SEE ALSO .ft R .nf MPI_T_pvar_get_info +MPI_T_pvar_get_num diff --git a/ompi/mpi/man/man3/MPI_T_pvar_session_create.3in b/ompi/mpi/man/man3/MPI_T_pvar_session_create.3in index 52a91c6617e..dd46817f314 100644 --- a/ompi/mpi/man/man3/MPI_T_pvar_session_create.3in +++ b/ompi/mpi/man/man3/MPI_T_pvar_session_create.3in @@ -19,6 +19,7 @@ int MPI_T_pvar_session_create(MPI_T_pvar_session *\fIsession\fP) int MPI_T_pvar_session_free(MPI_T_pvar_session *\fIsession\fP) +.fi .SH DESCRIPTION .ft R MPI_T_pvar_session_create creates a session for accessing performance variables. The diff --git a/ompi/mpi/man/man3/MPI_T_pvar_start.3in b/ompi/mpi/man/man3/MPI_T_pvar_start.3in index 450638149aa..2b1c9830d9b 100644 --- a/ompi/mpi/man/man3/MPI_T_pvar_start.3in +++ b/ompi/mpi/man/man3/MPI_T_pvar_start.3in @@ -19,6 +19,7 @@ int MPI_T_pvar_start(MPI_T_pvar_session \fIsession\fP, MPI_T_pvar_handle \fIhand int MPI_T_pvar_stop(MPI_T_pvar_session \fIsession\fP, MPI_T_pvar_handle \fIhandle\fP) +.fi .SH INPUT PARAMETERS .ft R .TP 1i diff --git a/ompi/mpi/man/man3/MPI_T_pvar_write.3in b/ompi/mpi/man/man3/MPI_T_pvar_write.3in index daaf28c0ac8..944a93c8e2f 100644 --- a/ompi/mpi/man/man3/MPI_T_pvar_write.3in +++ b/ompi/mpi/man/man3/MPI_T_pvar_write.3in @@ -33,7 +33,7 @@ Initial address of storage location for variable value. .SH DESCRIPTION .ft R MPI_T_pvar_write attempts to set the value of the performance variable identified by -the handle specified in \fIhandle\fP in the session specified in \fPsession\fI. The +the handle specified in \fIhandle\fP in the session specified in \fIsession\fP. The value to be written is specified in \fIbuf\fP. The caller must ensure that the buffer specified in \fIbuf\fP is large enough to hold the entire value of the performance variable. From 4083800c1842ed7ef5c14f2df682d4f9c204b619 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 29 May 2019 00:54:56 -0400 Subject: [PATCH 17/94] Use the correct counter name in the example. Signed-off-by: George Bosilca --- test/spc/spc_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/spc/spc_test.c b/test/spc/spc_test.c index a15d87dc95a..65dc744435f 100644 --- a/test/spc/spc_test.c +++ b/test/spc/spc_test.c @@ -44,8 +44,8 @@ int main(int argc, char **argv) char name[256], description[256]; /* Counter names to be read by ranks 0 and 1 */ - char *counter_names[] = { "runtime_spc_OMPI_BYTES_SENT_USER", - "runtime_spc_OMPI_BYTES_RECEIVED_USER" }; + char *counter_names[] = { "runtime_spc_OMPI_SPC_BYTES_SENT_USER", + "runtime_spc_OMPI_SPC_BYTES_RECEIVED_USER" }; MPI_Init(NULL, NULL); MPI_result = MPI_T_init_thread(MPI_THREAD_SINGLE, &provided); From 6c2cd10d684ec5a3a0d77cc5767d491dc9786e6f Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 4 Jun 2019 09:49:01 -0700 Subject: [PATCH 18/94] Fix tree spawn at scale Remove the debruijn component as it changes the daemon's parent process ID, thus breaking the other routed components Signed-off-by: Ralph Castain --- orte/mca/routed/debruijn/Makefile.am | 41 -- orte/mca/routed/debruijn/owner.txt | 7 - orte/mca/routed/debruijn/routed_debruijn.c | 481 ------------------ orte/mca/routed/debruijn/routed_debruijn.h | 27 - .../debruijn/routed_debruijn_component.c | 55 -- 5 files changed, 611 deletions(-) delete mode 100644 orte/mca/routed/debruijn/Makefile.am delete mode 100644 orte/mca/routed/debruijn/owner.txt delete mode 100644 orte/mca/routed/debruijn/routed_debruijn.c delete mode 100644 orte/mca/routed/debruijn/routed_debruijn.h delete mode 100644 orte/mca/routed/debruijn/routed_debruijn_component.c diff --git a/orte/mca/routed/debruijn/Makefile.am b/orte/mca/routed/debruijn/Makefile.am deleted file mode 100644 index 2a90f6989b7..00000000000 --- a/orte/mca/routed/debruijn/Makefile.am +++ /dev/null @@ -1,41 +0,0 @@ -# -# Copyright (c) 2007-2012 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - routed_debruijn.h \ - routed_debruijn.c \ - routed_debruijn_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_routed_debruijn_DSO -component_noinst = -component_install = mca_routed_debruijn.la -else -component_noinst = libmca_routed_debruijn.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_routed_debruijn_la_SOURCES = $(sources) -mca_routed_debruijn_la_LDFLAGS = -module -avoid-version -mca_routed_debruijn_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_routed_debruijn_la_SOURCES = $(sources) -libmca_routed_debruijn_la_LDFLAGS = -module -avoid-version - diff --git a/orte/mca/routed/debruijn/owner.txt b/orte/mca/routed/debruijn/owner.txt deleted file mode 100644 index b4ba3c21f5e..00000000000 --- a/orte/mca/routed/debruijn/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: LANL? -status: unmaintained diff --git a/orte/mca/routed/debruijn/routed_debruijn.c b/orte/mca/routed/debruijn/routed_debruijn.c deleted file mode 100644 index 4545fcae779..00000000000 --- a/orte/mca/routed/debruijn/routed_debruijn.c +++ /dev/null @@ -1,481 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2007-2012 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include - -#include "opal/dss/dss.h" -#include "opal/class/opal_hash_table.h" -#include "opal/class/opal_bitmap.h" -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ess/ess.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/rml_types.h" -#include "orte/util/name_fns.h" -#include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_wait.h" -#include "orte/runtime/runtime.h" -#include "orte/runtime/data_type_support/orte_dt_support.h" - -#include "orte/mca/rml/base/rml_contact.h" - -#include "orte/mca/routed/base/base.h" -#include "routed_debruijn.h" - - -static int init(void); -static int finalize(void); -static int delete_route(orte_process_name_t *proc); -static int update_route(orte_process_name_t *target, - orte_process_name_t *route); -static orte_process_name_t get_route(orte_process_name_t *target); -static int route_lost(const orte_process_name_t *route); -static bool route_is_defined(const orte_process_name_t *target); -static void update_routing_plan(void); -static void get_routing_list(opal_list_t *coll); -static int set_lifeline(orte_process_name_t *proc); -static size_t num_routes(void); - -#if OPAL_ENABLE_FT_CR == 1 -static int debruijn_ft_event(int state); -#endif - -orte_routed_module_t orte_routed_debruijn_module = { - .initialize = init, - .finalize = finalize, - .delete_route = delete_route, - .update_route = update_route, - .get_route = get_route, - .route_lost = route_lost, - .route_is_defined = route_is_defined, - .set_lifeline = set_lifeline, - .update_routing_plan = update_routing_plan, - .get_routing_list = get_routing_list, - .num_routes = num_routes, -#if OPAL_ENABLE_FT_CR == 1 - .ft_event = debruijn_ft_event -#else - NULL -#endif -}; - -/* local globals */ -static orte_process_name_t *lifeline=NULL; -static orte_process_name_t local_lifeline; -static opal_list_t my_children; -static bool hnp_direct=true; -static int log_nranks; -static int log_npeers; -static unsigned int rank_mask; - -static int init(void) -{ - lifeline = NULL; - - if (ORTE_PROC_IS_DAEMON) { - /* if we are using static ports, set my lifeline to point at my parent */ - if (orte_static_ports) { - lifeline = ORTE_PROC_MY_PARENT; - } else { - /* set our lifeline to the HNP - we will abort if that connection is lost */ - lifeline = ORTE_PROC_MY_HNP; - } - ORTE_PROC_MY_PARENT->jobid = ORTE_PROC_MY_NAME->jobid; - } else if (ORTE_PROC_IS_APP) { - /* if we don't have a designated daemon, just - * disqualify ourselves */ - if (NULL == orte_process_info.my_daemon_uri) { - return ORTE_ERR_TAKE_NEXT_OPTION; - } - /* set our lifeline to the local daemon - we will abort if this connection is lost */ - lifeline = ORTE_PROC_MY_DAEMON; - orte_routing_is_enabled = true; - } - - /* setup the list of children */ - OBJ_CONSTRUCT(&my_children, opal_list_t); - - return ORTE_SUCCESS; -} - -static int finalize(void) -{ - opal_list_item_t *item; - - lifeline = NULL; - - /* deconstruct the list of children */ - while (NULL != (item = opal_list_remove_first(&my_children))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&my_children); - - return ORTE_SUCCESS; -} - -static int delete_route(orte_process_name_t *proc) -{ - if (proc->jobid == ORTE_JOBID_INVALID || - proc->vpid == ORTE_VPID_INVALID) { - return ORTE_ERR_BAD_PARAM; - } - - /* if I am an application process, I don't have any routes - * so there is nothing for me to do - */ - if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON && - !ORTE_PROC_IS_TOOL) { - return ORTE_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, - "%s routed_debruijn_delete_route for %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc))); - - /* THIS CAME FROM OUR OWN JOB FAMILY...there is nothing - * to do here. The routes will be redefined when we update - * the routing tree - */ - - return ORTE_SUCCESS; -} - -static int update_route(orte_process_name_t *target, - orte_process_name_t *route) -{ - if (target->jobid == ORTE_JOBID_INVALID || - target->vpid == ORTE_VPID_INVALID) { - return ORTE_ERR_BAD_PARAM; - } - - /* if I am an application process, we don't update the route since - * we automatically route everything through the local daemon - */ - if (ORTE_PROC_IS_APP) { - return ORTE_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, - "%s routed_debruijn_update: %s --> %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(target), - ORTE_NAME_PRINT(route))); - - - /* if I am a daemon and the target is my HNP, then check - * the route - if it isn't direct, then we just flag that - * we have a route to the HNP - */ - if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target) && - OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, route)) { - hnp_direct = false; - return ORTE_SUCCESS; - } - - return ORTE_SUCCESS; -} - -static inline unsigned int debruijn_next_hop (int target) -{ - const int my_id = ORTE_PROC_MY_NAME->vpid; - uint64_t route, mask = rank_mask; - unsigned int i, next_hop; - - if (target == my_id) { - return my_id; - } - - i = -log_npeers; - do { - i += log_npeers; - mask = (mask >> i) << i; - route = (my_id << i) | target; - } while ((route & mask) != (((my_id << i) & target) & mask)); - - next_hop = (int)((route >> (i - log_npeers)) & rank_mask); - - /* if the next hop does not exist route to the lowest proc with the same lower routing bits */ - return (next_hop < orte_process_info.num_procs) ? next_hop : (next_hop & (rank_mask >> log_npeers)); -} - -static orte_process_name_t get_route(orte_process_name_t *target) -{ - orte_process_name_t ret; - - /* initialize */ - - do { - ret = *ORTE_NAME_INVALID; - - if (ORTE_JOBID_INVALID == target->jobid || - ORTE_VPID_INVALID == target->vpid) { - break; - } - - /* if it is me, then the route is just direct */ - if (OPAL_EQUAL == opal_dss.compare(ORTE_PROC_MY_NAME, target, ORTE_NAME)) { - ret = *target; - break; - } - - /* if I am an application process, always route via my local daemon */ - if (ORTE_PROC_IS_APP) { - ret = *ORTE_PROC_MY_DAEMON; - break; - } - - /* if I am a tool, the route is direct if target is in - * my own job family, and to the target's HNP if not - */ - if (ORTE_PROC_IS_TOOL) { - if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { - ret = *target; - } else { - ORTE_HNP_NAME_FROM_JOB(&ret, target->jobid); - } - - break; - } - - /****** HNP AND DAEMONS ONLY ******/ - - if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) { - if (!hnp_direct || orte_static_ports) { - OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, - "%s routing to the HNP through my parent %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(ORTE_PROC_MY_PARENT))); - ret = *ORTE_PROC_MY_PARENT; - } else { - OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, - "%s routing direct to the HNP", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - ret = *ORTE_PROC_MY_HNP; - } - - break; - } - - ret.jobid = ORTE_PROC_MY_NAME->jobid; - /* find out what daemon hosts this proc */ - if (ORTE_VPID_INVALID == (ret.vpid = orte_get_proc_daemon_vpid(target))) { - /* we don't yet know about this daemon. just route this to the "parent" */ - ret = *ORTE_PROC_MY_PARENT; - break; - } - - /* if the daemon is me, then send direct to the target! */ - if (ORTE_PROC_MY_NAME->vpid == ret.vpid) { - ret = *target; - break; - } - - /* find next hop */ - ret.vpid = debruijn_next_hop (ret.vpid); - } while (0); - - OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, - "%s routed_debruijn_get(%s) --> %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(target), - ORTE_NAME_PRINT(&ret))); - - return ret; -} - -static int route_lost(const orte_process_name_t *route) -{ - opal_list_item_t *item; - orte_routed_tree_t *child; - - OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, - "%s route to %s lost", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(route))); - - /* if we lose the connection to the lifeline and we are NOT already, - * in finalize, tell the OOB to abort. - * NOTE: we cannot call abort from here as the OOB needs to first - * release a thread-lock - otherwise, we will hang!! - */ - if (!orte_finalizing && - NULL != lifeline && - OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) { - OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, - "%s routed:debruijn: Connection to lifeline %s lost", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(lifeline))); - return ORTE_ERR_FATAL; - } - - /* if we are the HNP or daemon, and the route is a daemon, - * see if it is one of our children - if so, remove it - */ - if ((ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) && - route->jobid == ORTE_PROC_MY_NAME->jobid) { - for (item = opal_list_get_first(&my_children); - item != opal_list_get_end(&my_children); - item = opal_list_get_next(item)) { - child = (orte_routed_tree_t*)item; - if (child->vpid == route->vpid) { - opal_list_remove_item(&my_children, item); - OBJ_RELEASE(item); - return ORTE_SUCCESS; - } - } - } - - /* we don't care about this one, so return success */ - return ORTE_SUCCESS; -} - -static bool route_is_defined(const orte_process_name_t *target) -{ - /* find out what daemon hosts this proc */ - if (ORTE_VPID_INVALID == orte_get_proc_daemon_vpid((orte_process_name_t*)target)) { - return false; - } - - return true; -} - -static int set_lifeline(orte_process_name_t *proc) -{ - /* we have to copy the proc data because there is no - * guarantee that it will be preserved - */ - local_lifeline.jobid = proc->jobid; - local_lifeline.vpid = proc->vpid; - lifeline = &local_lifeline; - - return ORTE_SUCCESS; -} - -static unsigned int ilog2 (unsigned int v) -{ - const unsigned int b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000}; - const unsigned int S[] = {1, 2, 4, 8, 16}; - int i; - - register unsigned int r = 0; - for (i = 4; i >= 0; i--) { - if (v & b[i]) { - v >>= S[i]; - r |= S[i]; - } - } - - return r; -} - -static void update_routing_plan(void) -{ - orte_routed_tree_t *child; - opal_list_item_t *item; - int my_vpid = ORTE_PROC_MY_NAME->vpid; - int i; - - /* if I am anything other than a daemon or the HNP, this - * is a meaningless command as I am not allowed to route - */ - if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) { - return; - } - - /* clear the list of children if any are already present */ - while (NULL != (item = opal_list_remove_first(&my_children))) { - OBJ_RELEASE(item); - } - - log_nranks = (int) ilog2 ((unsigned int)orte_process_info.num_procs) ; - assert(log_nranks < 31); - - if (log_nranks < 3) { - log_npeers = 1; - } else if (log_nranks < 7) { - log_npeers = 2; - } else { - log_npeers = 4; - } - - /* round log_nranks to a multiple of log_npeers */ - log_nranks = ((log_nranks + log_npeers) & ~(log_npeers - 1)) - 1; - - rank_mask = (1 << (log_nranks + 1)) - 1; - - /* compute my parent */ - ORTE_PROC_MY_PARENT->vpid = my_vpid ? my_vpid >> log_npeers : -1; - - /* only add peers to the routing tree if this rank is the smallest rank that will send to - the any peer */ - if ((my_vpid >> (log_nranks + 1 - log_npeers)) == 0) { - for (i = (1 << log_npeers) - 1 ; i >= 0 ; --i) { - int next = ((my_vpid << log_npeers) | i) & rank_mask; - - /* add a peer to the routing tree only if its vpid is smaller than this rank */ - if (next > my_vpid && next < (int)orte_process_info.num_procs) { - child = OBJ_NEW(orte_routed_tree_t); - child->vpid = next; - opal_list_append (&my_children, &child->super); - } - } - } -} - -static void get_routing_list(opal_list_t *coll) -{ - /* if I am anything other than a daemon or the HNP, this - * is a meaningless command as I am not allowed to route - */ - if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) { - return; - } - - orte_routed_base_xcast_routing(coll, &my_children); -} - -static size_t num_routes(void) -{ - return opal_list_get_size(&my_children); -} - -#if OPAL_ENABLE_FT_CR == 1 -static int debruijn_ft_event(int state) -{ - int ret, exit_status = ORTE_SUCCESS; - - /******** Checkpoint Prep ********/ - if(OPAL_CRS_CHECKPOINT == state) { - } - /******** Continue Recovery ********/ - else if (OPAL_CRS_CONTINUE == state ) { - } - else if (OPAL_CRS_TERM == state ) { - /* Nothing */ - } - else { - /* Error state = Nothing */ - } - - cleanup: - return exit_status; -} -#endif - diff --git a/orte/mca/routed/debruijn/routed_debruijn.h b/orte/mca/routed/debruijn/routed_debruijn.h deleted file mode 100644 index 303b1fa9b1f..00000000000 --- a/orte/mca/routed/debruijn/routed_debruijn.h +++ /dev/null @@ -1,27 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2007-2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_ROUTED_DEBRUIJN_H -#define MCA_ROUTED_DEBRUIJN_H - -#include "orte_config.h" - -#include "orte/mca/routed/routed.h" - -BEGIN_C_DECLS - -ORTE_MODULE_DECLSPEC extern orte_routed_component_t mca_routed_debruijn_component; - -extern orte_routed_module_t orte_routed_debruijn_module; - -END_C_DECLS - -#endif diff --git a/orte/mca/routed/debruijn/routed_debruijn_component.c b/orte/mca/routed/debruijn/routed_debruijn_component.c deleted file mode 100644 index c16d014c23c..00000000000 --- a/orte/mca/routed/debruijn/routed_debruijn_component.c +++ /dev/null @@ -1,55 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2007-2015 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2004-2008 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/base/base.h" - -#include "orte/mca/routed/base/base.h" -#include "routed_debruijn.h" - -static int orte_routed_debruijn_component_query(mca_base_module_t **module, int *priority); - -/** - * component definition - */ -orte_routed_component_t mca_routed_debruijn_component = { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .base_version = { - ORTE_ROUTED_BASE_VERSION_3_0_0, - - .mca_component_name = "debruijn", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_query_component = orte_routed_debruijn_component_query - }, - .base_data = { - /* This component can be checkpointed */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int orte_routed_debruijn_component_query(mca_base_module_t **module, int *priority) -{ - /* Debruijn shall be our default, especially for large systems. For smaller - * systems, we will allow other options that have even fewer hops to - * support wireup - */ - *priority = 10; - *module = (mca_base_module_t *) &orte_routed_debruijn_module; - return ORTE_SUCCESS; -} From e07f1275764797a2b8920efbc819d651d157327a Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 4 Jun 2019 09:50:46 -0700 Subject: [PATCH 19/94] Ignore generated file Signed-off-by: Ralph Castain --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index d40aac1e60f..50cddfeacb7 100644 --- a/.gitignore +++ b/.gitignore @@ -199,6 +199,8 @@ ompi/mca/rte/orte/mpirun.1 ompi/mca/sharedfp/addproc/mca_sharedfp_addproc_control +ompi/mca/topo/treematch/config.h + ompi/mpi/c/profile/p*.c ompi/mpi/fortran/configure-fortran-output.h From e6e09c6cbac3a051e28865c206e1190ab241c202 Mon Sep 17 00:00:00 2001 From: Scott Miller Date: Thu, 30 May 2019 17:20:30 -0400 Subject: [PATCH 20/94] shmem/c: Fix shmem type for calls to shmem_test and shmem_wait_until with [u]int32_t and [u]int64_t Signed-off-by: Scott Miller (cherry picked from commit ca59cabc679ebdf1decdcf75f3da0766b35a34f7) --- oshmem/shmem/c/shmem_wait.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/oshmem/shmem/c/shmem_wait.c b/oshmem/shmem/c/shmem_wait.c index 1c94dd2c106..32d0f53c4ba 100644 --- a/oshmem/shmem/c/shmem_wait.c +++ b/oshmem/shmem/c/shmem_wait.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2013 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -117,10 +118,10 @@ SHMEM_TYPE_WAIT_UNTIL(_ushort, volatile unsigned short, SHMEM_SHORT, shmem) SHMEM_TYPE_WAIT_UNTIL(_uint, volatile unsigned int, SHMEM_INT, shmem) SHMEM_TYPE_WAIT_UNTIL(_ulong, volatile unsigned long, SHMEM_LONG, shmem) SHMEM_TYPE_WAIT_UNTIL(_ulonglong, volatile unsigned long long, SHMEM_LLONG, shmem) -SHMEM_TYPE_WAIT_UNTIL(_int32, volatile int32_t, SHMEM_LLONG, shmem) -SHMEM_TYPE_WAIT_UNTIL(_int64, volatile int64_t, SHMEM_LLONG, shmem) -SHMEM_TYPE_WAIT_UNTIL(_uint32, volatile uint32_t, SHMEM_LLONG, shmem) -SHMEM_TYPE_WAIT_UNTIL(_uint64, volatile uint64_t, SHMEM_LLONG, shmem) +SHMEM_TYPE_WAIT_UNTIL(_int32, volatile int32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_WAIT_UNTIL(_int64, volatile int64_t, SHMEM_INT64_T, shmem) +SHMEM_TYPE_WAIT_UNTIL(_uint32, volatile uint32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_WAIT_UNTIL(_uint64, volatile uint64_t, SHMEM_INT64_T, shmem) SHMEM_TYPE_WAIT_UNTIL(_size, volatile size_t, SHMEM_LLONG, shmem) SHMEM_TYPE_WAIT_UNTIL(_ptrdiff, volatile ptrdiff_t, SHMEM_LLONG, shmem) @@ -153,9 +154,9 @@ SHMEM_TYPE_TEST(_ushort, volatile unsigned short, SHMEM_SHORT, shmem) SHMEM_TYPE_TEST(_uint, volatile unsigned int, SHMEM_INT, shmem) SHMEM_TYPE_TEST(_ulong, volatile unsigned long, SHMEM_LONG, shmem) SHMEM_TYPE_TEST(_ulonglong, volatile unsigned long long, SHMEM_LLONG, shmem) -SHMEM_TYPE_TEST(_int32, volatile int32_t, SHMEM_LLONG, shmem) -SHMEM_TYPE_TEST(_int64, volatile int64_t, SHMEM_LLONG, shmem) -SHMEM_TYPE_TEST(_uint32, volatile uint32_t, SHMEM_LLONG, shmem) -SHMEM_TYPE_TEST(_uint64, volatile uint64_t, SHMEM_LLONG, shmem) +SHMEM_TYPE_TEST(_int32, volatile int32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_TEST(_int64, volatile int64_t, SHMEM_INT64_T, shmem) +SHMEM_TYPE_TEST(_uint32, volatile uint32_t, SHMEM_INT32_T, shmem) +SHMEM_TYPE_TEST(_uint64, volatile uint64_t, SHMEM_INT64_T, shmem) SHMEM_TYPE_TEST(_size, volatile size_t, SHMEM_LLONG, shmem) SHMEM_TYPE_TEST(_ptrdiff, volatile ptrdiff_t, SHMEM_LLONG, shmem) From 5acaf006ae54db3c492812acc788a56ec1c0dc99 Mon Sep 17 00:00:00 2001 From: perrynzhou Date: Wed, 5 Jun 2019 14:51:57 +0900 Subject: [PATCH 21/94] regx/base: fix an integer overflow use strtol() instead of atoi() in order to handle hostnames containing a large number. This is a one-off commit for the release branches since the regx framework has already been removed from master. Refs. open-mpi/ompi#6729 Signed-off-by: perrynzhou --- orte/mca/regx/base/regx_base_default_fns.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/orte/mca/regx/base/regx_base_default_fns.c b/orte/mca/regx/base/regx_base_default_fns.c index 6b70f78cad0..4e1645d16b5 100644 --- a/orte/mca/regx/base/regx_base_default_fns.c +++ b/orte/mca/regx/base/regx_base_default_fns.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2018 Research Organization for Information Science + * Copyright (c) 2018-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -1056,7 +1056,7 @@ static int regex_parse_node_range(char *base, char *range, int num_digits, char for (found = false, i = 0; i < len; ++i) { if (isdigit((int) range[i])) { if (!found) { - start = atoi(range + i); + start = strtol(range + i, NULL, 10); found = true; break; } From 5dd8830dcabce1094ff40d8e4d54d8397bf0b935 Mon Sep 17 00:00:00 2001 From: Tsubasa Yanagibashi Date: Fri, 31 May 2019 13:45:02 +0900 Subject: [PATCH 22/94] mpiext/pcollreq: Add `_f08` to procedure names The procedure names don't contain "_f08" of Fortran 2008 bindings of Persistent Collective Operations(mpiext/pcollreq/use-mpi-f08). This fix adds "_f08" to the procedure names of pcollreq/use-mpi-f08, same as other Fortran 2008 routines in `ompi/mpi/fortran/use-mpi-f08/mod`. Signed-off-by: Tsubasa Yanagibashi (cherry picked from commit 3148b0cfaa04843e7219acb8c7e04f43f6d219fe) --- .../use-mpi-f08/mpiext_pcollreq_usempif08.h | 340 +++++++++--------- 1 file changed, 170 insertions(+), 170 deletions(-) diff --git a/ompi/mpiext/pcollreq/use-mpi-f08/mpiext_pcollreq_usempif08.h b/ompi/mpiext/pcollreq/use-mpi-f08/mpiext_pcollreq_usempif08.h index 9377c441262..33e3556cce7 100644 --- a/ompi/mpiext/pcollreq/use-mpi-f08/mpiext_pcollreq_usempif08.h +++ b/ompi/mpiext/pcollreq/use-mpi-f08/mpiext_pcollreq_usempif08.h @@ -22,9 +22,9 @@ ! replicated here. interface mpix_allgather_init - subroutine mpix_allgather_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) + subroutine mpix_allgather_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcount, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -40,13 +40,13 @@ interface mpix_allgather_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_allgather_init + end subroutine mpix_allgather_init_f08 end interface mpix_allgather_init interface mpix_allgatherv_init - subroutine mpix_allgatherv_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, & - comm, info, request, ierror) + subroutine mpix_allgatherv_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcounts, displs, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -63,13 +63,13 @@ interface mpix_allgatherv_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_allgatherv_init + end subroutine mpix_allgatherv_init_f08 end interface mpix_allgatherv_init interface mpix_allreduce_init - subroutine mpix_allreduce_init(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) + subroutine mpix_allreduce_init_f08(sendbuf, recvbuf, count, & + datatype, op, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -86,13 +86,13 @@ interface mpix_allreduce_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_allreduce_init + end subroutine mpix_allreduce_init_f08 end interface mpix_allreduce_init interface mpix_alltoall_init - subroutine mpix_alltoall_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) + subroutine mpix_alltoall_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcount, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -108,13 +108,13 @@ interface mpix_alltoall_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_alltoall_init + end subroutine mpix_alltoall_init_f08 end interface mpix_alltoall_init interface mpix_alltoallv_init - subroutine mpix_alltoallv_init(sendbuf, sendcounts, sdispls, sendtype, & - recvbuf, recvcounts, rdispls, recvtype, & - comm, info, request, ierror) + subroutine mpix_alltoallv_init_f08(sendbuf, sendcounts, sdispls, sendtype, & + recvbuf, recvcounts, rdispls, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -130,13 +130,13 @@ interface mpix_alltoallv_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_alltoallv_init + end subroutine mpix_alltoallv_init_f08 end interface mpix_alltoallv_init interface mpix_alltoallw_init - subroutine mpix_alltoallw_init(sendbuf, sendcounts, sdispls, sendtypes, & - recvbuf, recvcounts, rdispls, recvtypes, & - comm, info, request, ierror) + subroutine mpix_alltoallw_init_f08(sendbuf, sendcounts, sdispls, sendtypes, & + recvbuf, recvcounts, rdispls, recvtypes, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -152,23 +152,23 @@ interface mpix_alltoallw_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_alltoallw_init + end subroutine mpix_alltoallw_init_f08 end interface mpix_alltoallw_init interface mpix_barrier_init - subroutine mpix_barrier_init(comm, info, request, ierror) + subroutine mpix_barrier_init_f08(comm, info, request, ierror) use :: mpi_f08_types, only : mpi_comm, mpi_info, mpi_request implicit none type(mpi_comm), intent(in) :: comm type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_barrier_init + end subroutine mpix_barrier_init_f08 end interface mpix_barrier_init interface mpix_bcast_init - subroutine mpix_bcast_init(buffer, count, datatype, root, & - comm, info, request, ierror) + subroutine mpix_bcast_init_f08(buffer, count, datatype, root, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: buffer @@ -183,13 +183,13 @@ interface mpix_bcast_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_bcast_init + end subroutine mpix_bcast_init_f08 end interface mpix_bcast_init interface mpix_exscan_init - subroutine mpix_exscan_init(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) + subroutine mpix_exscan_init_f08(sendbuf, recvbuf, count, & + datatype, op, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -206,13 +206,13 @@ interface mpix_exscan_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_exscan_init + end subroutine mpix_exscan_init_f08 end interface mpix_exscan_init interface mpix_gather_init - subroutine mpix_gather_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) + subroutine mpix_gather_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcount, recvtype, root, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -228,13 +228,13 @@ interface mpix_gather_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_gather_init + end subroutine mpix_gather_init_f08 end interface mpix_gather_init interface mpix_gatherv_init - subroutine mpix_gatherv_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, root, & - comm, info, request, ierror) + subroutine mpix_gatherv_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcounts, displs, recvtype, root, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -251,13 +251,13 @@ interface mpix_gatherv_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_gatherv_init + end subroutine mpix_gatherv_init_f08 end interface mpix_gatherv_init interface mpix_reduce_init - subroutine mpix_reduce_init(sendbuf, recvbuf, count, & - datatype, op, root, & - comm, info, request, ierror) + subroutine mpix_reduce_init_f08(sendbuf, recvbuf, count, & + datatype, op, root, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -274,13 +274,13 @@ interface mpix_reduce_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_reduce_init + end subroutine mpix_reduce_init_f08 end interface mpix_reduce_init interface mpix_reduce_scatter_init - subroutine mpix_reduce_scatter_init(sendbuf, recvbuf, recvcounts, & - datatype, op, & - comm, info, request, ierror) + subroutine mpix_reduce_scatter_init_f08(sendbuf, recvbuf, recvcounts, & + datatype, op, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -297,13 +297,13 @@ interface mpix_reduce_scatter_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_reduce_scatter_init + end subroutine mpix_reduce_scatter_init_f08 end interface mpix_reduce_scatter_init interface mpix_reduce_scatter_block_init - subroutine mpix_reduce_scatter_block_init(sendbuf, recvbuf, recvcount, & - datatype, op, & - comm, info, request, ierror) + subroutine mpix_reduce_scatter_block_init_f08(sendbuf, recvbuf, recvcount, & + datatype, op, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -320,13 +320,13 @@ interface mpix_reduce_scatter_block_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_reduce_scatter_block_init + end subroutine mpix_reduce_scatter_block_init_f08 end interface mpix_reduce_scatter_block_init interface mpix_scan_init - subroutine mpix_scan_init(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) + subroutine mpix_scan_init_f08(sendbuf, recvbuf, count, & + datatype, op, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -343,13 +343,13 @@ interface mpix_scan_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_scan_init + end subroutine mpix_scan_init_f08 end interface mpix_scan_init interface mpix_scatter_init - subroutine mpix_scatter_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) + subroutine mpix_scatter_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcount, recvtype, root, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -365,13 +365,13 @@ interface mpix_scatter_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_scatter_init + end subroutine mpix_scatter_init_f08 end interface mpix_scatter_init interface mpix_scatterv_init - subroutine mpix_scatterv_init(sendbuf, sendcounts, displs, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) + subroutine mpix_scatterv_init_f08(sendbuf, sendcounts, displs, sendtype, & + recvbuf, recvcount, recvtype, root, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -388,13 +388,13 @@ interface mpix_scatterv_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_scatterv_init + end subroutine mpix_scatterv_init_f08 end interface mpix_scatterv_init interface mpix_neighbor_allgather_init - subroutine mpix_neighbor_allgather_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) + subroutine mpix_neighbor_allgather_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcount, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -410,13 +410,13 @@ interface mpix_neighbor_allgather_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_neighbor_allgather_init + end subroutine mpix_neighbor_allgather_init_f08 end interface mpix_neighbor_allgather_init interface mpix_neighbor_allgatherv_init - subroutine mpix_neighbor_allgatherv_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, & - comm, info, request, ierror) + subroutine mpix_neighbor_allgatherv_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcounts, displs, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -433,13 +433,13 @@ interface mpix_neighbor_allgatherv_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_neighbor_allgatherv_init + end subroutine mpix_neighbor_allgatherv_init_f08 end interface mpix_neighbor_allgatherv_init interface mpix_neighbor_alltoall_init - subroutine mpix_neighbor_alltoall_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) + subroutine mpix_neighbor_alltoall_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcount, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -455,13 +455,13 @@ interface mpix_neighbor_alltoall_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_neighbor_alltoall_init + end subroutine mpix_neighbor_alltoall_init_f08 end interface mpix_neighbor_alltoall_init interface mpix_neighbor_alltoallv_init - subroutine mpix_neighbor_alltoallv_init(sendbuf, sendcounts, sdispls, sendtype, & - recvbuf, recvcounts, rdispls, recvtype, & - comm, info, request, ierror) + subroutine mpix_neighbor_alltoallv_init_f08(sendbuf, sendcounts, sdispls, sendtype, & + recvbuf, recvcounts, rdispls, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -477,13 +477,13 @@ interface mpix_neighbor_alltoallv_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_neighbor_alltoallv_init + end subroutine mpix_neighbor_alltoallv_init_f08 end interface mpix_neighbor_alltoallv_init interface mpix_neighbor_alltoallw_init - subroutine mpix_neighbor_alltoallw_init(sendbuf, sendcounts, sdispls, sendtypes, & - recvbuf, recvcounts, rdispls, recvtypes, & - comm, info, request, ierror) + subroutine mpix_neighbor_alltoallw_init_f08(sendbuf, sendcounts, sdispls, sendtypes, & + recvbuf, recvcounts, rdispls, recvtypes, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_address_kind, mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -500,13 +500,13 @@ interface mpix_neighbor_alltoallw_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine mpix_neighbor_alltoallw_init + end subroutine mpix_neighbor_alltoallw_init_f08 end interface mpix_neighbor_alltoallw_init interface pmpix_allgather_init - subroutine pmpix_allgather_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) + subroutine pmpix_allgather_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcount, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -522,13 +522,13 @@ interface pmpix_allgather_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_allgather_init + end subroutine pmpix_allgather_init_f08 end interface pmpix_allgather_init interface pmpix_allgatherv_init - subroutine pmpix_allgatherv_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, & - comm, info, request, ierror) + subroutine pmpix_allgatherv_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcounts, displs, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -545,13 +545,13 @@ interface pmpix_allgatherv_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_allgatherv_init + end subroutine pmpix_allgatherv_init_f08 end interface pmpix_allgatherv_init interface pmpix_allreduce_init - subroutine pmpix_allreduce_init(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) + subroutine pmpix_allreduce_init_f08(sendbuf, recvbuf, count, & + datatype, op, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -568,13 +568,13 @@ interface pmpix_allreduce_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_allreduce_init + end subroutine pmpix_allreduce_init_f08 end interface pmpix_allreduce_init interface pmpix_alltoall_init - subroutine pmpix_alltoall_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) + subroutine pmpix_alltoall_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcount, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -590,13 +590,13 @@ interface pmpix_alltoall_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_alltoall_init + end subroutine pmpix_alltoall_init_f08 end interface pmpix_alltoall_init interface pmpix_alltoallv_init - subroutine pmpix_alltoallv_init(sendbuf, sendcounts, sdispls, sendtype, & - recvbuf, recvcounts, rdispls, recvtype, & - comm, info, request, ierror) + subroutine pmpix_alltoallv_init_f08(sendbuf, sendcounts, sdispls, sendtype, & + recvbuf, recvcounts, rdispls, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -612,13 +612,13 @@ interface pmpix_alltoallv_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_alltoallv_init + end subroutine pmpix_alltoallv_init_f08 end interface pmpix_alltoallv_init interface pmpix_alltoallw_init - subroutine pmpix_alltoallw_init(sendbuf, sendcounts, sdispls, sendtypes, & - recvbuf, recvcounts, rdispls, recvtypes, & - comm, info, request, ierror) + subroutine pmpix_alltoallw_init_f08(sendbuf, sendcounts, sdispls, sendtypes, & + recvbuf, recvcounts, rdispls, recvtypes, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -634,23 +634,23 @@ interface pmpix_alltoallw_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_alltoallw_init + end subroutine pmpix_alltoallw_init_f08 end interface pmpix_alltoallw_init interface pmpix_barrier_init - subroutine pmpix_barrier_init(comm, info, request, ierror) + subroutine pmpix_barrier_init_f08(comm, info, request, ierror) use :: mpi_f08_types, only : mpi_comm, mpi_info, mpi_request implicit none type(mpi_comm), intent(in) :: comm type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_barrier_init + end subroutine pmpix_barrier_init_f08 end interface pmpix_barrier_init interface pmpix_bcast_init - subroutine pmpix_bcast_init(buffer, count, datatype, root, & - comm, info, request, ierror) + subroutine pmpix_bcast_init_f08(buffer, count, datatype, root, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: buffer @@ -665,13 +665,13 @@ interface pmpix_bcast_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_bcast_init + end subroutine pmpix_bcast_init_f08 end interface pmpix_bcast_init interface pmpix_exscan_init - subroutine pmpix_exscan_init(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) + subroutine pmpix_exscan_init_f08(sendbuf, recvbuf, count, & + datatype, op, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -688,13 +688,13 @@ interface pmpix_exscan_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_exscan_init + end subroutine pmpix_exscan_init_f08 end interface pmpix_exscan_init interface pmpix_gather_init - subroutine pmpix_gather_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) + subroutine pmpix_gather_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcount, recvtype, root, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -710,13 +710,13 @@ interface pmpix_gather_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_gather_init + end subroutine pmpix_gather_init_f08 end interface pmpix_gather_init interface pmpix_gatherv_init - subroutine pmpix_gatherv_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, root, & - comm, info, request, ierror) + subroutine pmpix_gatherv_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcounts, displs, recvtype, root, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -733,13 +733,13 @@ interface pmpix_gatherv_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_gatherv_init + end subroutine pmpix_gatherv_init_f08 end interface pmpix_gatherv_init interface pmpix_reduce_init - subroutine pmpix_reduce_init(sendbuf, recvbuf, count, & - datatype, op, root, & - comm, info, request, ierror) + subroutine pmpix_reduce_init_f08(sendbuf, recvbuf, count, & + datatype, op, root, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -756,13 +756,13 @@ interface pmpix_reduce_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_reduce_init + end subroutine pmpix_reduce_init_f08 end interface pmpix_reduce_init interface pmpix_reduce_scatter_init - subroutine pmpix_reduce_scatter_init(sendbuf, recvbuf, recvcounts, & - datatype, op, & - comm, info, request, ierror) + subroutine pmpix_reduce_scatter_init_f08(sendbuf, recvbuf, recvcounts, & + datatype, op, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -779,13 +779,13 @@ interface pmpix_reduce_scatter_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_reduce_scatter_init + end subroutine pmpix_reduce_scatter_init_f08 end interface pmpix_reduce_scatter_init interface pmpix_reduce_scatter_block_init - subroutine pmpix_reduce_scatter_block_init(sendbuf, recvbuf, recvcount, & - datatype, op, & - comm, info, request, ierror) + subroutine pmpix_reduce_scatter_block_init_f08(sendbuf, recvbuf, recvcount, & + datatype, op, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -802,13 +802,13 @@ interface pmpix_reduce_scatter_block_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_reduce_scatter_block_init + end subroutine pmpix_reduce_scatter_block_init_f08 end interface pmpix_reduce_scatter_block_init interface pmpix_scan_init - subroutine pmpix_scan_init(sendbuf, recvbuf, count, & - datatype, op, & - comm, info, request, ierror) + subroutine pmpix_scan_init_f08(sendbuf, recvbuf, count, & + datatype, op, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_op, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -825,13 +825,13 @@ interface pmpix_scan_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_scan_init + end subroutine pmpix_scan_init_f08 end interface pmpix_scan_init interface pmpix_scatter_init - subroutine pmpix_scatter_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) + subroutine pmpix_scatter_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcount, recvtype, root, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -847,13 +847,13 @@ interface pmpix_scatter_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_scatter_init + end subroutine pmpix_scatter_init_f08 end interface pmpix_scatter_init interface pmpix_scatterv_init - subroutine pmpix_scatterv_init(sendbuf, sendcounts, displs, sendtype, & - recvbuf, recvcount, recvtype, root, & - comm, info, request, ierror) + subroutine pmpix_scatterv_init_f08(sendbuf, sendcounts, displs, sendtype, & + recvbuf, recvcount, recvtype, root, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -870,13 +870,13 @@ interface pmpix_scatterv_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_scatterv_init + end subroutine pmpix_scatterv_init_f08 end interface pmpix_scatterv_init interface pmpix_neighbor_allgather_init - subroutine pmpix_neighbor_allgather_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) + subroutine pmpix_neighbor_allgather_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcount, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -892,13 +892,13 @@ interface pmpix_neighbor_allgather_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_neighbor_allgather_init + end subroutine pmpix_neighbor_allgather_init_f08 end interface pmpix_neighbor_allgather_init interface pmpix_neighbor_allgatherv_init - subroutine pmpix_neighbor_allgatherv_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcounts, displs, recvtype, & - comm, info, request, ierror) + subroutine pmpix_neighbor_allgatherv_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcounts, displs, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -915,13 +915,13 @@ interface pmpix_neighbor_allgatherv_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_neighbor_allgatherv_init + end subroutine pmpix_neighbor_allgatherv_init_f08 end interface pmpix_neighbor_allgatherv_init interface pmpix_neighbor_alltoall_init - subroutine pmpix_neighbor_alltoall_init(sendbuf, sendcount, sendtype, & - recvbuf, recvcount, recvtype, & - comm, info, request, ierror) + subroutine pmpix_neighbor_alltoall_init_f08(sendbuf, sendcount, sendtype, & + recvbuf, recvcount, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -937,13 +937,13 @@ interface pmpix_neighbor_alltoall_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_neighbor_alltoall_init + end subroutine pmpix_neighbor_alltoall_init_f08 end interface pmpix_neighbor_alltoall_init interface pmpix_neighbor_alltoallv_init - subroutine pmpix_neighbor_alltoallv_init(sendbuf, sendcounts, sdispls, sendtype, & - recvbuf, recvcounts, rdispls, recvtype, & - comm, info, request, ierror) + subroutine pmpix_neighbor_alltoallv_init_f08(sendbuf, sendcounts, sdispls, sendtype, & + recvbuf, recvcounts, rdispls, recvtype, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -959,13 +959,13 @@ interface pmpix_neighbor_alltoallv_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_neighbor_alltoallv_init + end subroutine pmpix_neighbor_alltoallv_init_f08 end interface pmpix_neighbor_alltoallv_init interface pmpix_neighbor_alltoallw_init - subroutine pmpix_neighbor_alltoallw_init(sendbuf, sendcounts, sdispls, sendtypes, & - recvbuf, recvcounts, rdispls, recvtypes, & - comm, info, request, ierror) + subroutine pmpix_neighbor_alltoallw_init_f08(sendbuf, sendcounts, sdispls, sendtypes, & + recvbuf, recvcounts, rdispls, recvtypes, & + comm, info, request, ierror) use :: mpi_f08_types, only : mpi_address_kind, mpi_datatype, mpi_comm, mpi_info, mpi_request implicit none !DEC$ ATTRIBUTES NO_ARG_CHECK :: sendbuf, recvbuf @@ -982,5 +982,5 @@ interface pmpix_neighbor_alltoallw_init type(mpi_info), intent(in) :: info type(mpi_request), intent(out) :: request integer, optional, intent(out) :: ierror - end subroutine pmpix_neighbor_alltoallw_init + end subroutine pmpix_neighbor_alltoallw_init_f08 end interface pmpix_neighbor_alltoallw_init From 900f0fa21fd37f17c2592ba2f3303fae496c2aa4 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Fri, 10 May 2019 14:32:27 +0200 Subject: [PATCH 23/94] OSC rdma: make sure accumulating in shared memory is safe Signed-off-by: Joseph Schuchart (cherry picked from commit c67e2291937a09947c421dc84c6b3a8d07bec07f) --- ompi/mca/osc/rdma/osc_rdma.h | 3 +++ ompi/mca/osc/rdma/osc_rdma_accumulate.c | 13 +++++++++++-- ompi/mca/osc/rdma/osc_rdma_component.c | 3 ++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/ompi/mca/osc/rdma/osc_rdma.h b/ompi/mca/osc/rdma/osc_rdma.h index b3743f261ec..6aed111ddf0 100644 --- a/ompi/mca/osc/rdma/osc_rdma.h +++ b/ompi/mca/osc/rdma/osc_rdma.h @@ -145,6 +145,9 @@ struct ompi_osc_rdma_module_t { bool acc_use_amo; + /** whether the group is located on a single node */ + bool single_node; + /** flavor of this window */ int flavor; diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.c b/ompi/mca/osc/rdma/osc_rdma_accumulate.c index 31c3fc29bef..7fa896e96c6 100644 --- a/ompi/mca/osc/rdma/osc_rdma_accumulate.c +++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.c @@ -889,10 +889,19 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo (void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); } + /* accumulate in (shared) memory if there is only a single node + * OR if we have an exclusive lock + * OR if other processes won't try to use the network either */ + bool use_shared_mem = module->single_node || + (ompi_osc_rdma_peer_local_base (peer) && + (ompi_osc_rdma_peer_is_exclusive (peer) || + !module->acc_single_intrinsic)); + /* if the datatype is small enough (and the count is 1) then try to directly use the hardware to execute * the atomic operation. this should be safe in all cases as either 1) the user has assured us they will - * never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock */ - if (origin_extent <= 8 && 1 == origin_count && !ompi_osc_rdma_peer_local_base (peer)) { + * never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock. + * avoid using the NIC if the operation can be done directly in shared memory. */ + if (origin_extent <= 8 && 1 == origin_count && !use_shared_mem) { if (module->acc_use_amo && ompi_datatype_is_predefined (origin_datatype)) { if (NULL == result_addr) { ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, origin_extent, peer, target_address, diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index a39c83273a3..3dea0f94c1d 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -543,7 +543,8 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s local_size = ompi_comm_size (shared_comm); /* CPU atomics can be used if every process is on the same node or the NIC allows mixing CPU and NIC atomics */ - module->use_cpu_atomics = local_size == global_size || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB); + module->single_node = local_size == global_size; + module->use_cpu_atomics = module->single_node || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB); if (1 == local_size) { /* no point using a shared segment if there are no other processes on this node */ From b5428aaf719446a0c046113aaa1bf827e9eff867 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Wed, 15 May 2019 20:17:29 -0700 Subject: [PATCH 24/94] btl/uct: add support for UCX 1.6.x This commit updates the uct btl to support the v1.6.x release of UCX. This release breaks API. Signed-off-by: Nathan Hjelm (cherry picked from commit b78066720c3e3299bd76f2e22d2c0e415db572fc) Signed-off-by: Geoffrey Paulsen --- opal/mca/btl/uct/btl_uct_endpoint.c | 34 ++++++++++++++++++++++++----- opal/mca/btl/uct/btl_uct_tl.c | 10 +++++++++ 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/opal/mca/btl/uct/btl_uct_endpoint.c b/opal/mca/btl/uct/btl_uct_endpoint.c index 40349673e27..ccdbd4511a2 100644 --- a/opal/mca/btl/uct/btl_uct_endpoint.c +++ b/opal/mca/btl/uct/btl_uct_endpoint.c @@ -4,6 +4,7 @@ * reserved. * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2019 Google, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -101,6 +102,28 @@ static void mca_btl_uct_process_modex (mca_btl_uct_module_t *uct_btl, unsigned c } } +static inline ucs_status_t mca_btl_uct_ep_create_connected_compat (uct_iface_h iface, uct_device_addr_t *device_addr, + uct_iface_addr_t *iface_addr, uct_ep_h *uct_ep) +{ +#if UCT_API >= UCT_VERSION(1, 6) + uct_ep_params_t ep_params = {.field_mask = UCT_EP_PARAM_FIELD_IFACE | UCT_EP_PARAM_FIELD_DEV_ADDR | UCT_EP_PARAM_FIELD_IFACE_ADDR, + .iface = iface, .dev_addr = device_addr, .iface_addr = iface_addr}; + return uct_ep_create (&ep_params, uct_ep); +#else + return uct_ep_create_connected (iface, device_addr, iface_addr, uct_ep); +#endif +} + +static inline ucs_status_t mca_btl_uct_ep_create_compat (uct_iface_h iface, uct_ep_h *uct_ep) +{ +#if UCT_API >= UCT_VERSION(1, 6) + uct_ep_params_t ep_params = {.field_mask = UCT_EP_PARAM_FIELD_IFACE, .iface = iface}; + return uct_ep_create (&ep_params, uct_ep); +#else + return uct_ep_create (iface, uct_ep); +#endif +} + static int mca_btl_uct_endpoint_connect_iface (mca_btl_uct_module_t *uct_btl, mca_btl_uct_tl_t *tl, mca_btl_uct_device_context_t *tl_context, mca_btl_uct_tl_endpoint_t *tl_endpoint, uint8_t *tl_data) @@ -116,7 +139,7 @@ static int mca_btl_uct_endpoint_connect_iface (mca_btl_uct_module_t *uct_btl, mc BTL_VERBOSE(("connecting endpoint to interface")); mca_btl_uct_context_lock (tl_context); - ucs_status = uct_ep_create_connected (tl_context->uct_iface, device_addr, iface_addr, &tl_endpoint->uct_ep); + ucs_status = mca_btl_uct_ep_create_connected_compat (tl_context->uct_iface, device_addr, iface_addr, &tl_endpoint->uct_ep); tl_endpoint->flags = MCA_BTL_UCT_ENDPOINT_FLAG_CONN_READY; mca_btl_uct_context_unlock (tl_context); @@ -240,8 +263,8 @@ static int mca_btl_uct_endpoint_connect_endpoint (mca_btl_uct_module_t *uct_btl, /* create a temporary endpoint for setting up the rdma endpoint */ MCA_BTL_UCT_CONTEXT_SERIALIZE(conn_tl_context, { - ucs_status = uct_ep_create_connected (conn_tl_context->uct_iface, device_addr, iface_addr, - &conn_ep->uct_ep); + ucs_status = mca_btl_uct_ep_create_connected_compat (conn_tl_context->uct_iface, device_addr, iface_addr, + &conn_ep->uct_ep); }); if (UCS_OK != ucs_status) { BTL_VERBOSE(("could not create an endpoint for forming connection to remote peer. code = %d", @@ -263,7 +286,7 @@ static int mca_btl_uct_endpoint_connect_endpoint (mca_btl_uct_module_t *uct_btl, opal_process_name_print (endpoint->ep_proc->proc_name))); MCA_BTL_UCT_CONTEXT_SERIALIZE(tl_context, { - ucs_status = uct_ep_create (tl_context->uct_iface, &tl_endpoint->uct_ep); + ucs_status = mca_btl_uct_ep_create_compat (tl_context->uct_iface, &tl_endpoint->uct_ep); }); if (UCS_OK != ucs_status) { OBJ_RELEASE(endpoint->conn_ep); @@ -309,7 +332,8 @@ int mca_btl_uct_endpoint_connect (mca_btl_uct_module_t *uct_btl, mca_btl_uct_end void *ep_addr, int tl_index) { mca_btl_uct_tl_endpoint_t *tl_endpoint = endpoint->uct_eps[context_id] + tl_index; - mca_btl_uct_tl_t *tl = (tl_index == uct_btl->rdma_tl->tl_index) ? uct_btl->rdma_tl : uct_btl->am_tl; + mca_btl_uct_tl_t *tl = (uct_btl->rdma_tl && tl_index == uct_btl->rdma_tl->tl_index) ? + uct_btl->rdma_tl : uct_btl->am_tl; mca_btl_uct_device_context_t *tl_context = mca_btl_uct_module_get_tl_context_specific (uct_btl, tl, context_id); uint8_t *rdma_tl_data = NULL, *conn_tl_data = NULL, *am_tl_data = NULL, *tl_data; mca_btl_uct_connection_ep_t *conn_ep = NULL; diff --git a/opal/mca/btl/uct/btl_uct_tl.c b/opal/mca/btl/uct/btl_uct_tl.c index be70af6ec8b..a711a41ce99 100644 --- a/opal/mca/btl/uct/btl_uct_tl.c +++ b/opal/mca/btl/uct/btl_uct_tl.c @@ -6,6 +6,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2019 Google, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -270,10 +271,18 @@ static void mca_btl_uct_context_enable_progress (mca_btl_uct_device_context_t *c mca_btl_uct_device_context_t *mca_btl_uct_context_create (mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl, int context_id, bool enable_progress) { +#if UCT_API >= UCT_VERSION(1, 6) + uct_iface_params_t iface_params = {.field_mask = UCT_IFACE_PARAM_FIELD_OPEN_MODE | + UCT_IFACE_PARAM_FIELD_DEVICE, + .open_mode = UCT_IFACE_OPEN_MODE_DEVICE, + .mode = {.device = {.tl_name = tl->uct_tl_name, + .dev_name = tl->uct_dev_name}}}; +#else uct_iface_params_t iface_params = {.rndv_cb = NULL, .eager_cb = NULL, .stats_root = NULL, .rx_headroom = 0, .open_mode = UCT_IFACE_OPEN_MODE_DEVICE, .mode = {.device = {.tl_name = tl->uct_tl_name, .dev_name = tl->uct_dev_name}}}; +#endif mca_btl_uct_device_context_t *context; ucs_status_t ucs_status; int rc; @@ -610,6 +619,7 @@ int mca_btl_uct_query_tls (mca_btl_uct_module_t *module, mca_btl_uct_md_t *md, u /* no rdma tls */ BTL_VERBOSE(("no rdma tl matched supplied filter. disabling RDMA support")); + module->super.btl_flags &= ~MCA_BTL_FLAGS_RDMA; module->super.btl_put = NULL; module->super.btl_get = NULL; module->super.btl_atomic_fop = NULL; From adba7f55f78575d0f8f1fbb38b6d0b22e32b7335 Mon Sep 17 00:00:00 2001 From: Mikhail Brinskii Date: Thu, 6 Jun 2019 19:22:00 +0300 Subject: [PATCH 25/94] COLL/BASE: Fix linear sync all2all Signed-off-by: Mikhail Brinskii (cherry picked from commit 79006f4e5a578d32bfa08de7b98e747ae18706f6) --- ompi/mca/coll/base/coll_base_alltoall.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_alltoall.c b/ompi/mca/coll/base/coll_base_alltoall.c index 3509ed36414..3f1bdc5fb58 100644 --- a/ompi/mca/coll/base/coll_base_alltoall.c +++ b/ompi/mca/coll/base/coll_base_alltoall.c @@ -398,22 +398,22 @@ int ompi_coll_base_alltoall_intra_linear_sync(const void *sbuf, int scount, prcv = (char *) rbuf; psnd = (char *) sbuf; - /* Post first batch or ireceive and isend requests */ + /* Post first batch of irecv and isend requests */ for (nreqs = 0, nrreqs = 0, ri = (rank + 1) % size; nreqs < total_reqs; ri = (ri + 1) % size, ++nrreqs) { - nreqs++; error = MCA_PML_CALL(irecv (prcv + (ptrdiff_t)ri * rext, rcount, rdtype, ri, MCA_COLL_BASE_TAG_ALLTOALL, comm, &reqs[nreqs])); + nreqs++; if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; } } for (nsreqs = 0, si = (rank + size - 1) % size; nreqs < 2 * total_reqs; - si = (si + size - 1) % size, ++nsreqs) { - nreqs++; + si = (si + size - 1) % size, ++nsreqs) { error = MCA_PML_CALL(isend (psnd + (ptrdiff_t)si * sext, scount, sdtype, si, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, &reqs[nreqs])); + nreqs++; if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; } } From 05fa5845bc26b1aa7b214a172080c00095092e7a Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 18 Jun 2019 21:14:04 -0700 Subject: [PATCH 26/94] Fix finalize of flux component Per patches from @SteVwonder and @garlick Signed-off-by: Ralph Castain (cherry picked from commit d4070d5f58f0c65aef89eea5910b202b8402e48b) --- opal/mca/pmix/flux/pmix_flux.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/opal/mca/pmix/flux/pmix_flux.c b/opal/mca/pmix/flux/pmix_flux.c index 187108bcc7d..3233524e0fe 100644 --- a/opal/mca/pmix/flux/pmix_flux.c +++ b/opal/mca/pmix/flux/pmix_flux.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. @@ -373,6 +373,7 @@ static int flux_init(opal_list_t *ilist) char *str; if (0 < pmix_init_count) { + pmix_init_count++; return OPAL_SUCCESS; } @@ -585,11 +586,10 @@ static int flux_fini(void) { if (0 == --pmix_init_count) { PMI_Finalize (); + // teardown hash table + opal_pmix_base_hash_finalize(); } - // teardown hash table - opal_pmix_base_hash_finalize(); - return OPAL_SUCCESS; } From c5cf3432b987fe51a4b906f8c0edb0ac9ef59a90 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 5 Jun 2019 14:18:38 +0200 Subject: [PATCH 27/94] OSC rdma win allocate: synchronize error codes across shared memory group Signed-off-by: Joseph Schuchart (cherry picked from commit 8f27cc26d9845b5b207979b2a4621ef1089d1afb) --- ompi/mca/osc/rdma/osc_rdma_component.c | 91 +++++++++++++++----------- 1 file changed, 52 insertions(+), 39 deletions(-) diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index 3dea0f94c1d..d1e99b98dd1 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -524,6 +524,19 @@ struct _local_data { size_t size; }; +static int synchronize_errorcode(int errorcode, ompi_communicator_t *comm) +{ + int ret; + int err = errorcode; + /* This assumes that error codes are negative integers */ + ret = comm->c_coll->coll_allreduce (MPI_IN_PLACE, &err, 1, MPI_INT, MPI_MIN, + comm, comm->c_coll->coll_allreduce_module); + if (OPAL_UNLIKELY (OMPI_SUCCESS != ret)) { + err = ret; + } + return err; +} + static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, size_t size) { ompi_communicator_t *shared_comm; @@ -595,21 +608,24 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s OMPI_PROC_MY_NAME->jobid, ompi_comm_get_cid(module->comm)); if (0 > ret) { ret = OMPI_ERR_OUT_OF_RESOURCE; - break; + } else { + /* allocate enough space for the state + data for all local ranks */ + ret = opal_shmem_segment_create (&module->seg_ds, data_file, total_size); + free (data_file); + if (OPAL_SUCCESS != ret) { + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create shared memory segment"); + } } + } - /* allocate enough space for the state + data for all local ranks */ - ret = opal_shmem_segment_create (&module->seg_ds, data_file, total_size); - free (data_file); - if (OPAL_SUCCESS != ret) { - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to create shared memory segment"); - break; - } + ret = synchronize_errorcode(ret, shared_comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + break; } - ret = module->comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0, + ret = shared_comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0, shared_comm, shared_comm->c_coll->coll_bcast_module); - if (OMPI_SUCCESS != ret) { + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { break; } @@ -617,6 +633,10 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s if (NULL == module->segment_base) { OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to attach to the shared memory segment"); ret = OPAL_ERROR; + } + + ret = synchronize_errorcode(ret, shared_comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { break; } @@ -636,35 +656,28 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s memset (module->state, 0, module->state_size); if (0 == local_rank) { + /* unlink the shared memory backing file */ + opal_shmem_unlink (&module->seg_ds); /* just go ahead and register the whole segment */ ret = ompi_osc_rdma_register (module, MCA_BTL_ENDPOINT_ANY, module->segment_base, total_size, MCA_BTL_REG_FLAG_ACCESS_ANY, &module->state_handle); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - break; + if (OPAL_LIKELY(OMPI_SUCCESS == ret)) { + state_region->base = (intptr_t) module->segment_base; + if (module->state_handle) { + memcpy (state_region->btl_handle_data, module->state_handle, module->selected_btl->btl_registration_handle_size); + } } + } - state_region->base = (intptr_t) module->segment_base; - if (module->state_handle) { - memcpy (state_region->btl_handle_data, module->state_handle, module->selected_btl->btl_registration_handle_size); - } + /* synchronization to make sure memory is registered */ + ret = synchronize_errorcode(ret, shared_comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + break; } if (MPI_WIN_FLAVOR_CREATE == module->flavor) { ret = ompi_osc_rdma_initialize_region (module, base, size); - if (OMPI_SUCCESS != ret) { - break; - } - } - - /* barrier to make sure all ranks have attached */ - shared_comm->c_coll->coll_barrier(shared_comm, shared_comm->c_coll->coll_barrier_module); - - /* unlink the shared memory backing file */ - if (0 == local_rank) { - opal_shmem_unlink (&module->seg_ds); - } - - if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { + } else if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { ompi_osc_rdma_region_t *region = (ompi_osc_rdma_region_t *) module->state->regions; module->state->disp_unit = module->disp_unit; module->state->region_count = 1; @@ -675,8 +688,11 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s } } - /* barrier to make sure all ranks have set up their region data */ - shared_comm->c_coll->coll_barrier(shared_comm, shared_comm->c_coll->coll_barrier_module); + /* synchronization to make sure all ranks have set up their region data */ + ret = synchronize_errorcode(ret, shared_comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + break; + } offset = data_base; for (int i = 0 ; i < local_size ; ++i) { @@ -995,13 +1011,7 @@ static int ompi_osc_rdma_share_data (ompi_osc_rdma_module_t *module) free (temp); } while (0); - - ret = module->comm->c_coll->coll_allreduce (&ret, &global_result, 1, MPI_INT, MPI_MIN, module->comm, - module->comm->c_coll->coll_allreduce_module); - - if (OMPI_SUCCESS != ret) { - global_result = ret; - } + global_result = synchronize_errorcode(ret, module->comm); /* none of these communicators are needed anymore so free them now*/ if (MPI_COMM_NULL != module->local_leaders) { @@ -1236,6 +1246,9 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, /* fill in our part */ ret = allocate_state_shared (module, base, size); + + /* notify all others if something went wrong */ + ret = synchronize_errorcode(ret, module->comm); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_ERROR, "failed to allocate internal state"); ompi_osc_rdma_free (win); From 16e1d74c8fb9307882010fe3680f1ff603d31f1a Mon Sep 17 00:00:00 2001 From: Harald Klimach Date: Thu, 13 Jun 2019 15:49:04 +0200 Subject: [PATCH 28/94] Suggestion to fix division by zero in file view. In common_ompi_aggregators calc_cost routine: do not cast the real division to an int intermediately. This patch removes the obsolete int variable c and assigns the result of the P_a/P_x division directly to n_as. With the intermediate int c variable, n_as gets 0 if P_a < P_x, resulting in a division by 0 when computing n_s. Signed-off-by: Harald Klimach (cherry picked from commit e222a04ae57e5d09b8559f3c111de1f10a47246a) --- ompi/mca/common/ompio/common_ompio_aggregators.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ompi/mca/common/ompio/common_ompio_aggregators.c b/ompi/mca/common/ompio/common_ompio_aggregators.c index b1da09d77b5..5a570d8e005 100644 --- a/ompi/mca/common/ompio/common_ompio_aggregators.c +++ b/ompi/mca/common/ompio/common_ompio_aggregators.c @@ -1491,13 +1491,12 @@ static double cost_calc (int P, int P_a, size_t d_p, size_t b_c, int dim ) } case DIM2: { - int P_x, P_y, c; + int P_x, P_y; P_x = P_y = (int) sqrt(P); - c = (float) P_a / (float)P_x; + n_as = (float) P_a / (float)P_x; n_ar = (float) P_y; - n_as = (float) c; if ( d_p > (P_a*b_c/P )) { m_s = fmin(b_c / P_y, d_p); } From b35363957334c0b2011218e6019044c343247495 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 25 Jun 2019 13:27:17 -0700 Subject: [PATCH 29/94] Update to PMIx v3.1.3rc4 Will provide PR to update VERSION to final release once passes MTT Signed-off-by: Ralph Castain --- .gitignore | 18 + opal/mca/pmix/pmix3x/pmix/LICENSE | 2 + opal/mca/pmix/pmix3x/pmix/Makefile.am | 5 +- opal/mca/pmix/pmix3x/pmix/NEWS | 47 +- opal/mca/pmix/pmix3x/pmix/VERSION | 10 +- opal/mca/pmix/pmix3x/pmix/autogen.pl | 17 +- opal/mca/pmix/pmix3x/pmix/config/Makefile.am | 5 +- .../mca/pmix/pmix3x/pmix/config/distscript.sh | 8 +- opal/mca/pmix/pmix3x/pmix/config/pmix.m4 | 97 ++- .../pmix/config/pmix_config_pthreads.m4 | 353 +--------- .../pmix3x/pmix/config/pmix_config_threads.m4 | 5 +- .../pmix3x/pmix/config/pmix_search_libs.m4 | 37 +- .../pmix3x/pmix/config/pmix_setup_hwloc.m4 | 6 +- .../pmix3x/pmix/config/pmix_setup_libevent.m4 | 6 +- opal/mca/pmix/pmix3x/pmix/configure.ac | 3 + .../pmix3x/pmix/contrib/make_dist_tarball | 14 +- opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec | 43 +- .../pmix/pmix3x/pmix/include/pmix_common.h.in | 152 +++-- .../pmix/pmix3x/pmix/include/pmix_rename.h.in | 2 +- .../pmix3x/pmix/include/pmix_version.h.in | 4 +- .../pmix3x/pmix/src/atomics/sys/atomic_impl.h | 4 +- .../pmix3x/pmix/src/atomics/sys/atomic_stdc.h | 9 +- .../pmix/src/atomics/sys/powerpc/atomic.h | 26 +- .../pmix/src/event/pmix_event_notification.c | 23 +- .../pmix/src/event/pmix_event_registration.c | 8 +- .../pmix3x/pmix/src/include/pmix_globals.h | 5 +- .../pmix/src/mca/base/help-pmix-mca-base.txt | 13 +- .../base/pmix_mca_base_component_repository.c | 14 +- .../pmix/src/mca/bfrops/v12/Makefile.am | 5 +- .../pmix/src/mca/bfrops/v20/Makefile.am | 5 +- .../pmix/src/mca/bfrops/v21/Makefile.am | 5 +- .../pmix3x/pmix/src/mca/bfrops/v3/Makefile.am | 5 +- .../pmix/src/mca/gds/base/gds_base_fns.c | 5 +- .../pmix3x/pmix/src/mca/gds/ds12/Makefile.am | 5 +- .../pmix3x/pmix/src/mca/gds/ds21/Makefile.am | 5 +- .../src/mca/gds/ds21/gds_ds21_lock_pthread.c | 3 +- .../pmix3x/pmix/src/mca/gds/hash/Makefile.am | 5 +- .../pmix3x/pmix/src/mca/gds/hash/gds_hash.c | 209 ++++-- .../pmix/pmix3x/pmix/src/mca/pdl/configure.m4 | 8 +- .../pmix/src/mca/plog/default/Makefile.am | 5 +- .../pmix/src/mca/plog/stdfd/Makefile.am | 5 +- .../pmix/src/mca/plog/syslog/Makefile.am | 5 +- .../pmix/src/mca/pnet/base/pnet_base_fns.c | 3 +- .../pmix3x/pmix/src/mca/pnet/opa/Makefile.am | 5 +- .../pmix3x/pmix/src/mca/pnet/tcp/Makefile.am | 5 +- .../pmix3x/pmix/src/mca/pnet/test/Makefile.am | 5 +- .../pmix/src/mca/preg/native/Makefile.am | 5 +- .../pmix/src/mca/preg/native/preg_native.c | 4 +- .../src/mca/psec/dummy_handshake/Makefile.am | 59 ++ .../dummy_handshake/psec_dummy_handshake.c | 170 +++++ .../dummy_handshake/psec_dummy_handshake.h | 29 + .../psec_dummy_handshake_component.c | 73 ++ .../pmix/src/mca/psec/munge/Makefile.am | 5 +- .../pmix/src/mca/psec/native/Makefile.am | 5 +- .../pmix/src/mca/psec/native/psec_native.c | 22 +- .../pmix3x/pmix/src/mca/psec/none/Makefile.am | 5 +- opal/mca/pmix/pmix3x/pmix/src/mca/psec/psec.h | 33 +- .../pmix/src/mca/psensor/file/Makefile.am | 5 +- .../src/mca/psensor/heartbeat/Makefile.am | 5 +- .../pmix/src/mca/pshmem/mmap/Makefile.am | 4 + .../pmix/src/mca/ptl/base/ptl_base_sendrecv.c | 113 ++-- .../pmix3x/pmix/src/mca/ptl/tcp/Makefile.am | 5 +- .../pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c | 13 +- .../pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.h | 3 +- .../pmix/src/mca/ptl/tcp/ptl_tcp_component.c | 140 +++- .../pmix3x/pmix/src/mca/ptl/usock/Makefile.am | 5 +- .../src/mca/ptl/usock/ptl_usock_component.c | 40 +- .../pmix/src/runtime/pmix_progress_threads.c | 6 +- .../pmix/pmix3x/pmix/src/server/pmix_server.c | 114 +++- .../pmix3x/pmix/src/server/pmix_server_ops.c | 558 ++++++++++------ .../mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c | 626 +++++++++--------- opal/mca/pmix/pmix3x/pmix/src/util/output.c | 13 +- opal/mca/pmix/pmix3x/pmix/src/util/output.h | 13 +- opal/mca/pmix/pmix3x/pmix/test/Makefile.am | 56 +- .../pmix/pmix3x/pmix/test/run_tests00.pl.in | 73 ++ .../pmix/pmix3x/pmix/test/run_tests01.pl.in | 73 ++ .../pmix/pmix3x/pmix/test/run_tests02.pl.in | 73 ++ .../pmix/pmix3x/pmix/test/run_tests03.pl.in | 73 ++ .../pmix/pmix3x/pmix/test/run_tests04.pl.in | 73 ++ .../pmix/pmix3x/pmix/test/run_tests05.pl.in | 73 ++ .../pmix/pmix3x/pmix/test/run_tests06.pl.in | 73 ++ .../pmix/pmix3x/pmix/test/run_tests07.pl.in | 73 ++ .../pmix/pmix3x/pmix/test/run_tests08.pl.in | 73 ++ .../pmix/pmix3x/pmix/test/run_tests09.pl.in | 73 ++ .../pmix/pmix3x/pmix/test/run_tests10.pl.in | 73 ++ .../pmix/pmix3x/pmix/test/run_tests11.pl.in | 73 ++ .../pmix/pmix3x/pmix/test/run_tests12.pl.in | 73 ++ .../pmix/pmix3x/pmix/test/run_tests13.pl.in | 73 ++ .../pmix/pmix3x/pmix/test/run_tests14.pl.in | 73 ++ .../pmix/pmix3x/pmix/test/run_tests15.pl.in | 73 ++ opal/mca/pmix/pmix3x/pmix/test/test_common.h | 4 +- opal/mca/pmix/pmix3x/pmix/test/test_fence.c | 11 +- opal/mca/pmix/pmix3x/pmix/test/test_server.c | 85 +-- 93 files changed, 3333 insertions(+), 1293 deletions(-) create mode 100644 opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/Makefile.am create mode 100644 opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake.c create mode 100644 opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake.h create mode 100644 opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake_component.c create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests00.pl.in create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests01.pl.in create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests02.pl.in create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests03.pl.in create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests04.pl.in create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests05.pl.in create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests06.pl.in create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests07.pl.in create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests08.pl.in create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests09.pl.in create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests10.pl.in create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests11.pl.in create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests12.pl.in create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests13.pl.in create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests14.pl.in create mode 100755 opal/mca/pmix/pmix3x/pmix/test/run_tests15.pl.in diff --git a/.gitignore b/.gitignore index 50cddfeacb7..d9de74cae8f 100644 --- a/.gitignore +++ b/.gitignore @@ -401,6 +401,24 @@ opal/mca/pmix/ext3x/ext3x_client.c opal/mca/pmix/ext3x/ext3x_component.c opal/mca/pmix/ext3x/ext3x_server_north.c opal/mca/pmix/ext3x/ext3x_server_south.c +opal/mca/pmix/pmix3x/pmix/config/mca_library_paths.txt +opal/mca/pmix/pmix3x/pmix/config/test-driver +opal/mca/pmix/pmix3x/pmix/test/run_tests00.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests01.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests02.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests03.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests04.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests05.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests06.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests07.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests08.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests09.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests10.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests11.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests12.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests13.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests14.pl +opal/mca/pmix/pmix3x/pmix/test/run_tests15.pl opal/tools/opal-checkpoint/opal-checkpoint opal/tools/opal-checkpoint/opal-checkpoint.1 diff --git a/opal/mca/pmix/pmix3x/pmix/LICENSE b/opal/mca/pmix/pmix3x/pmix/LICENSE index 3eb0a094e01..9f9a1b943ba 100644 --- a/opal/mca/pmix/pmix3x/pmix/LICENSE +++ b/opal/mca/pmix/pmix3x/pmix/LICENSE @@ -47,6 +47,8 @@ Copyright (c) 2012 The University of Wisconsin-La Crosse. All rights reserved. Copyright (c) 2013-2019 Intel, Inc. All rights reserved. Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. +Copyright (c) 2019 Amazon.com, Inc. or its affiliates. All Rights + reserved. $COPYRIGHT$ diff --git a/opal/mca/pmix/pmix3x/pmix/Makefile.am b/opal/mca/pmix/pmix3x/pmix/Makefile.am index 78fdedaafce..47e21332730 100644 --- a/opal/mca/pmix/pmix3x/pmix/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/Makefile.am @@ -11,7 +11,9 @@ # All rights reserved. # Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2019 Amazon.com, Inc. or its affiliates. All Rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -25,6 +27,7 @@ ACLOCAL_AMFLAGS = -I ./config SUBDIRS = config contrib include src etc +AM_DISTCHECK_CONFIGURE_FLAGS = --disable-dlopen headers = sources = diff --git a/opal/mca/pmix/pmix3x/pmix/NEWS b/opal/mca/pmix/pmix3x/pmix/NEWS index abc79780523..dd478a9a87e 100644 --- a/opal/mca/pmix/pmix3x/pmix/NEWS +++ b/opal/mca/pmix/pmix3x/pmix/NEWS @@ -1,5 +1,5 @@ Copyright (c) 2015-2019 Intel, Inc. All rights reserved. -Copyright (c) 2017 IBM Corporation. All rights reserved. +Copyright (c) 2017-2019 IBM Corporation. All rights reserved. $COPYRIGHT$ Additional copyrights may follow @@ -21,6 +21,51 @@ example, a bug might be fixed in the master, and then moved to multiple release branches. +3.1.3 -- TBD +---------------------- +- PR #1096: Restore PMIX_NUM_SLOTS for backward compatibility +- PR #1106: Automatically generate PMIX_NUMERIC_VERSION +- PR #1143: Fix tool connection handshake for tools that are registered + clients +- PR #1163: Fix a compiler warning in atomics on POWER arch +- PR #1162: Fix race condition when clients fail while in a PMIx + collective operation +- PR #1166: Fix a regression in spinlock atomics +- PR #1159: Fix missing pointer update when shared memory segment + was re-attached +- PR #1180: Remove dependency on C++ compiler for thread detection +- PR #1180: Add detection for Flex when building in non-tarball situations +- PR #1165: Add dependency on libevent-devel to rpm spec file +- PR #1188: Link libpmix.so to MCA component libraries +- PR #1194: Ensure any cached notifications arrive after registration completes +- PR #1205: Add "make check" support +- PR #1209: Update configure logic for clock_gettime +- PR #1213/#1217/#1221: Add configure option "--enable-nonglobal-dlopen" + If the MCA component libraries should link back to libpmix.so +- PR #1231: SPEC: Allow splitting PMIx in pmix and pmix-libpmi packages +- PR #1222: Fix case of multiple launcher calls in job script +- PR #1237: Avoid double-free of collective tracker +- PR #1237: Ensure all participants are notified of fence complete +- PR #1237: Ensure all participants are notified of connect and disconnect complete +- PR #1250: Fix PMIx_server_finalize hang (rare) +- PR #1271: PTL/usock doesn't support tools +- PR #1280: Fix the PTL connection establishment protocol +- PR #1280: Fix tool connection in psec/handshake mode +- PR #1289: Avoid output_verbose overhead when it won't print +- PR #1296: Allow setup_fork to proceed even if gdds and pnet don't contribute +- PR #1296: Allow servers to pass NULL module +- PR #1297: Provide internal blocking ability to the register/deregister fns +- PR #1298: Add dummy handshake component to psec framework for testing +- PR #1303: Allow jobs to not specify proc-level info +- PR #1304: Provide proc data in cases where host does not +- PR #1305: Add some more values that can be computed +- PR #1308: Add missing tool rendezvous file +- PR #1309: Fix potential integer overflow in regex +- PR #1311: Work around memory bug in older gcc compilers +- PR #1321: Provide memory op hooks in user-facing macros +- PR #1329: Add -fPIC to static builds + + 3.1.2 -- 24 Jan 2019 ---------------------- - Fix a bug in macro identifying system events diff --git a/opal/mca/pmix/pmix3x/pmix/VERSION b/opal/mca/pmix/pmix3x/pmix/VERSION index a81e5274f87..98c143b2677 100644 --- a/opal/mca/pmix/pmix3x/pmix/VERSION +++ b/opal/mca/pmix/pmix3x/pmix/VERSION @@ -15,7 +15,7 @@ major=3 minor=1 -release=2 +release=3 # greek is used for alpha or beta release tags. If it is non-empty, # it will be appended to the version number. It does not have to be @@ -23,14 +23,14 @@ release=2 # The only requirement is that it must be entirely printable ASCII # characters and have no white space. -greek= +greek=rc4 # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git0f7075f7 +repo_rev=git5e6ec324 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Jan 24, 2019" +date="Jun 25, 2019" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library @@ -75,7 +75,7 @@ date="Jan 24, 2019" # Version numbers are described in the Libtool current:revision:age # format. -libpmix_so_version=4:22:2 +libpmix_so_version=4:23:2 libpmi_so_version=1:0:0 libpmi2_so_version=1:0:0 diff --git a/opal/mca/pmix/pmix3x/pmix/autogen.pl b/opal/mca/pmix/pmix3x/pmix/autogen.pl index 40b533d2142..9d365783c8e 100755 --- a/opal/mca/pmix/pmix3x/pmix/autogen.pl +++ b/opal/mca/pmix/pmix3x/pmix/autogen.pl @@ -4,7 +4,7 @@ # Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2015 IBM Corporation. All rights reserved. @@ -37,6 +37,9 @@ # Sanity check file my $topdir_file = "include/pmix.h"; my $dnl_line = "dnl ---------------------------------------------------------------------------"; +# The text file we'll write at the end that will contain +# all the mca component directory paths +my $mca_library_paths_file = "config/mca_library_paths.txt"; # Data structures to fill up with all the stuff we find my $mca_found; @@ -137,6 +140,9 @@ sub mca_process_component { push(@{$mca_found->{$framework}->{"components"}}, $found_component); + # save the directory for later to create the paths + # to all the component libraries + push(@subdirs, $cdir); } ############################################################################## @@ -723,6 +729,15 @@ sub in_tarball { print M4 $m4; close(M4); +# Remove the old library path file and write the new one +verbose "==> Writing txt file with all the mca component paths\n"; +unlink($mca_library_paths_file); +open(M4, ">$mca_library_paths_file") || + my_die "Cannot open $mca_library_paths_file"; +my $paths = join(":", @subdirs); +print M4 $paths; +close(M4); + # Run autoreconf verbose "==> Running autoreconf\n"; my $cmd = "autoreconf -ivf --warnings=all,no-obsolete,no-override -I config"; diff --git a/opal/mca/pmix/pmix3x/pmix/config/Makefile.am b/opal/mca/pmix/pmix3x/pmix/config/Makefile.am index 3793162404c..ebc3af9d96a 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/config/Makefile.am @@ -1,4 +1,4 @@ -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. @@ -44,7 +44,8 @@ EXTRA_DIST = \ pmix_setup_cc.m4 \ pmix_setup_zlib.m4 \ pmix_setup_libevent.m4 \ - pmix_mca_priority_sort.pl + pmix_mca_priority_sort.pl \ + mca_library_paths.txt maintainer-clean-local: diff --git a/opal/mca/pmix/pmix3x/pmix/config/distscript.sh b/opal/mca/pmix/pmix3x/pmix/config/distscript.sh index de41d2ba7b6..e5c948f15f1 100755 --- a/opal/mca/pmix/pmix3x/pmix/config/distscript.sh +++ b/opal/mca/pmix/pmix3x/pmix/config/distscript.sh @@ -11,11 +11,11 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2015 Research Organization for Information Science -# and Technology (RIST). All rights reserved. +# Copyright (c) 2015-2019 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # Copyright (c) 2015 Los Alamos National Security, LLC. All rights # reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -42,7 +42,7 @@ fi # Otherwise, use what configure told us, at the cost of allowing one # or two corner cases in (but otherwise VPATH builds won't work). repo_rev=$PMIX_REPO_REV -if test -d .git ; then +if test -e .git ; then repo_rev=$(config/pmix_get_version.sh VERSION --repo-rev) fi diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 index 5d1660649f5..a90e23b9795 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 @@ -18,8 +18,8 @@ dnl reserved. dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved. dnl Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. dnl Copyright (c) 2013-2019 Intel, Inc. All rights reserved. -dnl Copyright (c) 2015-2017 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2015-2019 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl Copyright (c) 2016 Mellanox Technologies, Inc. dnl All rights reserved. dnl @@ -120,9 +120,11 @@ AC_DEFUN([PMIX_SETUP_CORE],[ pmixmajor=${PMIX_MAJOR_VERSION}L pmixminor=${PMIX_MINOR_VERSION}L pmixrelease=${PMIX_RELEASE_VERSION}L + pmixnumeric=$(printf 0x%4.4x%2.2x%2.2x $PMIX_MAJOR_VERSION $PMIX_MINOR_VERSION $PMIX_RELEASE_VERSION) AC_SUBST(pmixmajor) AC_SUBST(pmixminor) AC_SUBST(pmixrelease) + AC_SUBST(pmixnumeric) AC_CONFIG_FILES(pmix_config_prefix[include/pmix_version.h]) PMIX_GREEK_VERSION="`$PMIX_top_srcdir/config/pmix_get_version.sh $PMIX_top_srcdir/VERSION --greek`" @@ -645,6 +647,11 @@ AC_DEFUN([PMIX_SETUP_CORE],[ pmix_show_title "Library and Function tests" + # Darwin doesn't need -lutil, as it's something other than this -lutil. + PMIX_SEARCH_LIBS_CORE([openpty], [util]) + + PMIX_SEARCH_LIBS_CORE([gethostbyname], [nsl]) + PMIX_SEARCH_LIBS_CORE([socket], [socket]) # IRIX and CentOS have dirname in -lgen, usually in libc @@ -653,6 +660,9 @@ AC_DEFUN([PMIX_SETUP_CORE],[ # Darwin doesn't need -lm, as it's a symlink to libSystem.dylib PMIX_SEARCH_LIBS_CORE([ceil], [m]) + # -lrt might be needed for clock_gettime + PMIX_SEARCH_LIBS_CORE([clock_gettime], [rt]) + AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen posix_fallocate tcgetpgrp]) # On some hosts, htonl is a define, so the AC_CHECK_FUNC will get @@ -715,8 +725,6 @@ AC_DEFUN([PMIX_SETUP_CORE],[ CFLAGS="$CFLAGS $THREAD_CFLAGS" CPPFLAGS="$CPPFLAGS $THREAD_CPPFLAGS" - CXXFLAGS="$CXXFLAGS $THREAD_CXXFLAGS" - CXXCPPFLAGS="$CXXCPPFLAGS $THREAD_CXXCPPFLAGS" LDFLAGS="$LDFLAGS $THREAD_LDFLAGS" LIBS="$LIBS $THREAD_LIBS" @@ -726,10 +734,10 @@ AC_DEFUN([PMIX_SETUP_CORE],[ AC_PROG_LN_S + # Check for some common system programs that we need AC_PROG_GREP AC_PROG_EGREP - ################################## # Visibility ################################## @@ -844,6 +852,32 @@ AC_DEFUN([PMIX_SETUP_CORE],[ AC_SUBST(pmixlibdir) AC_SUBST(pmixincludedir) + ############################################################################ + # setup "make check" + ############################################################################ + PMIX_BUILT_TEST_PREFIX=$PMIX_top_builddir + AC_SUBST(PMIX_BUILT_TEST_PREFIX) + # expose the mca component library paths in the build system + pathfile=$PMIX_top_srcdir/config/mca_library_paths.txt + PMIX_COMPONENT_LIBRARY_PATHS=`cat $pathfile` + AC_SUBST(PMIX_COMPONENT_LIBRARY_PATHS) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests00.pl], [chmod +x test/run_tests00.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests01.pl], [chmod +x test/run_tests01.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests02.pl], [chmod +x test/run_tests02.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests03.pl], [chmod +x test/run_tests03.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests04.pl], [chmod +x test/run_tests04.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests05.pl], [chmod +x test/run_tests05.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests06.pl], [chmod +x test/run_tests06.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests07.pl], [chmod +x test/run_tests07.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests08.pl], [chmod +x test/run_tests08.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests09.pl], [chmod +x test/run_tests09.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests10.pl], [chmod +x test/run_tests10.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests11.pl], [chmod +x test/run_tests11.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests12.pl], [chmod +x test/run_tests12.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests13.pl], [chmod +x test/run_tests13.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests14.pl], [chmod +x test/run_tests14.pl]) + AC_CONFIG_FILES(pmix_config_prefix[test/run_tests15.pl], [chmod +x test/run_tests15.pl]) + ############################################################################ # final output ############################################################################ @@ -883,6 +917,10 @@ AC_DEFUN([PMIX_DEFINE_ARGS],[ [Whether build should attempt to use dlopen (or similar) to dynamically load components. (default: enabled)])]) + AS_IF([test "$enable_dlopen" = "unknown"], + [AC_MSG_WARN([enable_dlopen variable has been overwritten by configure]) + AC_MSG_WARN([This is an internal error that should be reported to PMIx developers]) + AC_MSG_ERROR([Cannot continue])]) AS_IF([test "$enable_dlopen" = "no"], [enable_mca_dso="no" enable_mca_static="yes" @@ -898,7 +936,7 @@ AC_DEFUN([PMIX_DEFINE_ARGS],[ AC_ARG_ENABLE([embedded-mode], [AC_HELP_STRING([--enable-embedded-mode], [Using --enable-embedded-mode causes PMIx to skip a few configure checks and install nothing. It should only be used when building PMIx within the scope of a larger package.])]) - AS_IF([test ! -z "$enable_embedded_mode" && test "$enable_embedded_mode" = "yes"], + AS_IF([test "$enable_embedded_mode" = "yes"], [pmix_mode=embedded pmix_install_primary_headers=no AC_MSG_RESULT([yes])], @@ -910,8 +948,16 @@ AC_DEFUN([PMIX_DEFINE_ARGS],[ # Is this a developer copy? # -if test -d .git; then +if test -e $PMIX_TOP_SRCDIR/.git; then PMIX_DEVEL=1 + # check for Flex + AC_PROG_LEX + if test "x$LEX" != xflex; then + AC_MSG_WARN([PMIx requires Flex to build from non-tarball sources,]) + AC_MSG_WARN([but Flex was not found. Please install Flex into]) + AC_MSG_WARN([your path and try again]) + AC_MSG_ERROR([Cannot continue]) + fi else PMIX_DEVEL=0 fi @@ -962,7 +1008,6 @@ fi #################### Early development override #################### if test "$WANT_DEBUG" = "0"; then CFLAGS="-DNDEBUG $CFLAGS" - CXXFLAGS="-DNDEBUG $CXXFLAGS" fi AC_DEFINE_UNQUOTED(PMIX_ENABLE_DEBUG, $WANT_DEBUG, [Whether we want developer-level debugging code or not]) @@ -1133,6 +1178,41 @@ fi AM_CONDITIONAL([PMIX_INSTALL_BINARIES], [test $WANT_PMIX_BINARIES -eq 1]) + +# see if they want to disable non-RTLD_GLOBAL dlopen +AC_MSG_CHECKING([if want to support dlopen of non-global namespaces]) +AC_ARG_ENABLE([nonglobal-dlopen], + AC_HELP_STRING([--enable-nonglobal-dlopen], + [enable non-global dlopen (default: enabled)])) +if test "$enable_nonglobal_dlopen" == "no"; then + AC_MSG_RESULT([no]) + pmix_need_libpmix=0 +else + AC_MSG_RESULT([yes]) + pmix_need_libpmix=1 +fi + +# if someone enables embedded mode but doesn't want to install the +# devel headers, then default nonglobal-dlopen to false +AS_IF([test -z "$enable_nonglobal_dlopen" && test "x$pmix_mode" = "xembedded" && test $WANT_INSTALL_HEADERS -eq 0 && test $pmix_need_libpmix -eq 1], + [pmix_need_libpmix=0]) + +# +# psec/dummy_handshake +# + +AC_MSG_CHECKING([if want build psec/dummy_handshake]) +AC_ARG_ENABLE(dummy-handshake, + AC_HELP_STRING([--enable-dummy-handshake], + [Enables psec dummy component intended to check the PTL handshake scenario (default: disabled)])) +if test "$enable_dummy_handshake" != "yes"; then + AC_MSG_RESULT([no]) + eval "DISABLE_psec_dummy_handshake=1" +else + AC_MSG_RESULT([yes]) + eval "DISABLE_psec_dummy_handshake=0" +fi +AM_CONDITIONAL(MCA_BUILD_PSEC_DUMMY_HANDSHAKE, test "$DISABLE_psec_dummy_handshake" = "0") ])dnl # This must be a standalone routine so that it can be called both by @@ -1148,6 +1228,7 @@ AC_DEFUN([PMIX_DO_AM_CONDITIONALS],[ AM_CONDITIONAL([WANT_PRIMARY_HEADERS], [test "x$pmix_install_primary_headers" = "xyes"]) AM_CONDITIONAL(WANT_INSTALL_HEADERS, test "$WANT_INSTALL_HEADERS" = 1) AM_CONDITIONAL(WANT_PMI_BACKWARD, test "$WANT_PMI_BACKWARD" = 1) + AM_CONDITIONAL(NEED_LIBPMIX, [test "$pmix_need_libpmix" = "1"]) ]) pmix_did_am_conditionals=yes ])dnl diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_config_pthreads.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_config_pthreads.m4 index 2e2f1fd8f97..b23f66ebb01 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_config_pthreads.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_config_pthreads.m4 @@ -10,7 +10,7 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2014-2017 Intel, Inc. All rights reserved. +dnl Copyright (c) 2014-2019 Intel, Inc. All rights reserved. dnl Copyright (c) 2014-2016 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ @@ -71,104 +71,6 @@ int main(int argc, char* argv[]) # END: PMIX_INTL_PTHREAD_TRY_LINK ])dnl - -AC_DEFUN([PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN], [ -# BEGIN: PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN -# -# Make sure that we can run a small application in Fortran, with -# pthreads living in a C object file - -# Fortran module -cat > conftestf.f < conftest.c < -#include -#include -$pmix_conftest_h - -#ifdef __cplusplus -extern "C" { -#endif -int i = 3; -pthread_t me, newthread; - -void cleanup_routine(void *foo); -void *thread_main(void *foo); -void pthreadtest_f(void); - -void cleanup_routine(void *foo) { i = 4; } -void *thread_main(void *foo) { i = 2; return (void*) &i; } - -void pthreadtest_f(void) -{ - pthread_attr_t attr; - - me = pthread_self(); - pthread_atfork(NULL, NULL, NULL); - pthread_attr_init(&attr); - pthread_cleanup_push(cleanup_routine, 0); - pthread_create(&newthread, &attr, thread_main, 0); - pthread_join(newthread, 0); - pthread_cleanup_pop(0); -} - -void pthreadtest(void) -{ pthreadtest_f(); } - -void pthreadtest_(void) -{ pthreadtest_f(); } - -void pthreadtest__(void) -{ pthreadtest_f(); } - -void PTHREADTEST(void) -{ pthreadtest_f(); } - -#ifdef __cplusplus -} -#endif -EOF - -# Try the compile -PMIX_LOG_COMMAND( - [$CC $CFLAGS -I. -c conftest.c], - PMIX_LOG_COMMAND( - [$FC $FCFLAGS conftestf.f conftest.o -o conftest $LDFLAGS $LIBS], - [HAPPY=1], - [HAPPY=0]), - [HAPPY=0]) - -if test "$HAPPY" = "1"; then - $1 -else - PMIX_LOG_MSG([here is the C program:], 1) - PMIX_LOG_FILE([conftest.c]) - if test -f conftest.h; then - PMIX_LOG_MSG([here is contest.h:], 1) - PMIX_LOG_FILE([conftest.h]) - fi - PMIX_LOG_MSG([here is the fortran program:], 1) - PMIX_LOG_FILE([conftestf.f]) - $2 -fi - -unset HAPPY pmix_conftest_h -rm -rf conftest* -# END: PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN -])dnl - - # ******************************************************************** # # Try to compile thread support without any special flags @@ -194,48 +96,6 @@ fi ])dnl -AC_DEFUN([PMIX_INTL_POSIX_THREADS_PLAIN_CXX], [ -# -# C++ compiler -# -if test "$pmix_pthread_cxx_success" = "0"; then - AC_MSG_CHECKING([if C++ compiler and POSIX threads work as is]) - - AC_LANG_PUSH(C++) - PMIX_INTL_PTHREAD_TRY_LINK(pmix_pthread_cxx_success=1, - pmix_pthread_cxx_success=0) - AC_LANG_POP(C++) - if test "$pmix_pthread_cxx_success" = "1"; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi -fi -])dnl - - -AC_DEFUN([PMIX_INTL_POSIX_THREADS_PLAIN_FC], [ -# -# Fortran compiler -# -if test "$pmix_pthread_fortran_success" = "0" && \ - test "$OMPI_TRY_FORTRAN_BINDINGS" -gt "$OMPI_FORTRAN_NO_BINDINGS" && \ - test $ompi_fortran_happy -eq 1; then - AC_MSG_CHECKING([if Fortran compiler and POSIX threads work as is]) - - AC_LANG_PUSH(C) - PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN(pmix_pthread_fortran_success=1, - pmix_pthread_fortran_success=0) - AC_LANG_POP(C) - if test "$pmix_pthread_fortran_success" = "1"; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi -fi -])dnl - - AC_DEFUN([PMIX_INTL_POSIX_THREADS_PLAIN], [ # BEGIN: PMIX_INTL_POSIX_THREADS_PLAIN # @@ -246,19 +106,10 @@ AC_DEFUN([PMIX_INTL_POSIX_THREADS_PLAIN], [ # why take chances? # -# Only run C++ and Fortran if those compilers already configured AC_PROVIDE_IFELSE([AC_PROG_CC], [PMIX_INTL_POSIX_THREADS_PLAIN_C], [pmix_pthread_c_success=1]) -AC_PROVIDE_IFELSE([AC_PROG_CXX], - [PMIX_INTL_POSIX_THREADS_PLAIN_CXX], - [pmix_pthread_cxx_success=1]) - -AC_PROVIDE_IFELSE([AC_PROG_FC], - [PMIX_INTL_POSIX_THREADS_PLAIN_FC], - [pmix_pthread_fortran_success=1]) - # End: PMIX_INTL_POSIX_THREADS_PLAIN ])dnl @@ -294,60 +145,6 @@ fi ]) -AC_DEFUN([PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_CXX], [ -# -# C++ compiler -# -if test "$pmix_pthread_cxx_success" = "0"; then - for pf in $pflags; do - AC_MSG_CHECKING([if C++ compiler and POSIX threads work with $pf]) - CXXFLAGS="$orig_CXXFLAGS $pf" - AC_LANG_PUSH(C++) - PMIX_INTL_PTHREAD_TRY_LINK(pmix_pthread_cxx_success=1, - pmix_pthread_cxx_success=0) - AC_LANG_POP(C++) - if test "$pmix_pthread_cxx_success" = "1"; then - PTHREAD_CXXFLAGS="$pf" - AC_MSG_RESULT([yes]) - break - else - PTHREAD_CXXFLAGS= - CXXFLAGS="$orig_CXXFLAGS" - AC_MSG_RESULT([no]) - fi - done -fi -]) - - -AC_DEFUN([PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_FC], [ -# -# Fortran compiler -# -if test "$pmix_pthread_fortran_success" = "0" && \ - test "$OMPI_TRY_FORTRAN_BINDINGS" -gt "$OMPI_FORTRAN_NO_BINDINGS" && \ - test $ompi_fortran_happy -eq 1; then - for pf in $pflags; do - AC_MSG_CHECKING([if Fortran compiler and POSIX threads work with $pf]) - FCFLAGS="$orig_FCFLAGS $pf" - AC_LANG_PUSH(C) - PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN(pmix_pthread_fortran_success=1, - pmix_pthread_fortran_success=0) - AC_LANG_POP(C) - if test "$pmix_pthread_fortran_success" = "1"; then - PTHREAD_FCFLAGS="$pf" - AC_MSG_RESULT([yes]) - break - else - PTHREAD_FCFLAGS= - FCFLAGS="$orig_FCFLAGS" - AC_MSG_RESULT([no]) - fi - done -fi -]) - - AC_DEFUN([PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS],[ # Begin: PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS # @@ -374,19 +171,10 @@ case "${host_cpu}-${host_os}" in ;; esac -# Only run C++ and Fortran if those compilers already configured AC_PROVIDE_IFELSE([AC_PROG_CC], [PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_C], [pmix_pthread_c_success=1]) -AC_PROVIDE_IFELSE([AC_PROG_CXX], - [PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_CXX], - [pmix_pthread_cxx_success=1]) - -AC_PROVIDE_IFELSE([AC_PROG_FC], - [PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS_FC], - [pmix_pthread_fortran_success=1]) - # End: PMIX_INTL_POSIX_THREADS_SPECIAL_FLAGS ])dnl @@ -435,121 +223,6 @@ if test "$pmix_pthread_c_success" = "0"; then fi ])dnl - -AC_DEFUN([PMIX_INTL_POSIX_THREADS_LIBS_CXX],[ -# -# C++ compiler -# -if test "$pmix_pthread_cxx_success" = "0"; then - if test ! "$pmix_pthread_c_success" = "0" && test ! "$PTHREAD_LIBS" = "" ; then - AC_MSG_CHECKING([if C++ compiler and POSIX threads work with $PTHREAD_LIBS]) - case "${host_cpu}-${host-_os}" in - *-aix* | *-freebsd*) - if test "`echo $CXXCPPFLAGS | $GREP 'D_THREAD_SAFE'`" = ""; then - PTHREAD_CXXCPPFLAGS="-D_THREAD_SAFE" - CXXCPPFLAGS="$CXXCPPFLAGS $PTHREAD_CXXCPPFLAGS" - fi - ;; - *) - if test "`echo $CXXCPPFLAGS | $GREP 'D_REENTRANT'`" = ""; then - PTHREAD_CXXCPPFLAGS="-D_REENTRANT" - CXXCPPFLAGS="$CXXCPPFLAGS $PTHREAD_CXXCPPFLAGS" - fi - ;; - esac - LIBS="$orig_LIBS $PTHREAD_LIBS" - AC_LANG_PUSH(C++) - PMIX_INTL_PTHREAD_TRY_LINK(pmix_pthread_cxx_success=1, - pmix_pthread_cxx_success=0) - AC_LANG_POP(C++) - if test "$pmix_pthread_cxx_success" = "1"; then - AC_MSG_RESULT([yes]) - else - CXXCPPFLAGS="$orig_CXXCPPFLAGS" - LIBS="$orig_LIBS" - AC_MSG_RESULT([no]) - AC_MSG_ERROR([Can not find working threads configuration. aborting]) - fi - else - for pl in $plibs; do - AC_MSG_CHECKING([if C++ compiler and POSIX threads work with $pl]) - case "${host_cpu}-${host-_os}" in - *-aix* | *-freebsd*) - if test "`echo $CXXCPPFLAGS | $GREP 'D_THREAD_SAFE'`" = ""; then - PTHREAD_CXXCPPFLAGS="-D_THREAD_SAFE" - CXXCPPFLAGS="$CXXCPPFLAGS $PTHREAD_CXXCPPFLAGS" - fi - ;; - *) - if test "`echo $CXXCPPFLAGS | $GREP 'D_REENTRANT'`" = ""; then - PTHREAD_CXXCPPFLAGS="-D_REENTRANT" - CXXCPPFLAGS="$CXXCPPFLAGS $PTHREAD_CXXCPPFLAGS" - fi - ;; - esac - LIBS="$orig_LIBS $pl" - AC_LANG_PUSH(C++) - PMIX_INTL_PTHREAD_TRY_LINK(pmix_pthread_cxx_success=1, - pmix_pthread_cxx_success=0) - AC_LANG_POP(C++) - if test "$pmix_pthread_cxx_success" = "1"; then - PTHREAD_LIBS="$pl" - AC_MSG_RESULT([yes]) - else - PTHREAD_CXXCPPFLAGS= - CXXCPPFLAGS="$orig_CXXCPPFLAGS" - LIBS="$orig_LIBS" - AC_MSG_RESULT([no]) - fi - done - fi -fi -])dnl - - -AC_DEFUN([PMIX_INTL_POSIX_THREADS_LIBS_FC],[ -# -# Fortran compiler -# -if test "$pmix_pthread_fortran_success" = "0" && \ - test "$OMPI_TRY_FORTRAN_BINDINGS" -gt "$OMPI_FORTRAN_NO_BINDINGS" && \ - test $ompi_fortran_happy -eq 1; then - if test ! "$pmix_pthread_c_success" = "0" && test ! "$PTHREAD_LIBS" = "" ; then - AC_MSG_CHECKING([if Fortran compiler and POSIX threads work with $PTHREAD_LIBS]) - LIBS="$orig_LIBS $PTHREAD_LIBS" - AC_LANG_PUSH(C) - PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN(pmix_pthread_fortran_success=1, - pmix_pthread_fortran_success=0) - AC_LANG_POP(C) - if test "$pmix_pthread_fortran_success" = "1"; then - AC_MSG_RESULT([yes]) - else - LIBS="$orig_LIBS" - AC_MSG_RESULT([no]) - AC_MSG_ERROR([Can not find working threads configuration. aborting]) - fi - else - for pl in $plibs; do - AC_MSG_CHECKING([if Fortran compiler and POSIX threads work with $pl]) - LIBS="$orig_LIBS $pl" - AC_LANG_PUSH(C) - PMIX_INTL_PTHREAD_TRY_LINK_FORTRAN(pmix_pthread_fortran_success=1, - pmix_pthread_fortran_success=0) - AC_LANG_POP(C) - if test "$pmix_pthread_fortran_success" = "1"; then - PTHREAD_LIBS="$pl" - AC_MSG_RESULT([yes]) - break - else - LIBS="$orig_LIBS" - AC_MSG_RESULT([no]) - fi - done - fi -fi -])dnl - - AC_DEFUN([PMIX_INTL_POSIX_THREADS_LIBS],[ # Begin: PMIX_INTL_POSIX_THREADS_LIBS # @@ -563,19 +236,10 @@ AC_DEFUN([PMIX_INTL_POSIX_THREADS_LIBS],[ # libpthread: The usual place (like we can define usual!) plibs="-lpthreads -llthread -lpthread" -# Only run C++ and Fortran if those compilers already configured AC_PROVIDE_IFELSE([AC_PROG_CC], [PMIX_INTL_POSIX_THREADS_LIBS_C], [pmix_pthread_c_success=1]) -AC_PROVIDE_IFELSE([AC_PROG_CXX], - [PMIX_INTL_POSIX_THREADS_LIBS_CXX], - [pmix_pthread_cxx_success=1]) - -AC_PROVIDE_IFELSE([AC_PROG_FC], - [PMIX_INTL_POSIX_THREADS_LIBS_FC], - [pmix_pthread_fortran_success=1]) - # End: PMIX_INTL_POSIX_THREADS_LIBS] )dnl @@ -589,21 +253,14 @@ AC_DEFUN([PMIX_CONFIG_POSIX_THREADS],[ AC_REQUIRE([AC_PROG_GREP]) pmix_pthread_c_success=0 -pmix_pthread_cxx_success=0 orig_CFLAGS="$CFLAGS" -orig_FCFLAGS="$FCFLAGS" -orig_CXXFLAGS="$CXXFLAGS" orig_CPPFLAGS="$CPPFLAGS" -orig_CXXCPPFLAGS="$CXXCPPFLAGS" orig_LDFLAGS="$LDFLAGS" orig_LIBS="$LIBS" PTHREAD_CFLAGS= -PTHREAD_FCFLAGS= -PTHREAD_CXXFLAGS= PTHREAD_CPPFLAGS= -PTHREAD_CXXCPPFLAGS= PTHREAD_LDFLAGS= PTHREAD_LIBS= @@ -648,15 +305,11 @@ AC_DEFINE_UNQUOTED([PMIX_HAVE_PTHREAD_MUTEX_ERRORCHECK], [$defval], [If PTHREADS implementation supports PTHREAD_MUTEX_ERRORCHECK]) CFLAGS="$orig_CFLAGS" -FCFLAGS="$orig_FCFLAGS" -CXXFLAGS="$orig_CXXFLAGS" CPPFLAGS="$orig_CPPFLAGS" -CXXCPPFLAGS="$orig_CXXCPPFLAGS" LDFLAGS="$orig_LDFLAGS" LIBS="$orig_LIBS" -if test "$pmix_pthread_c_success" = "1" && \ - test "$pmix_pthread_cxx_success" = "1"; then +if test "$pmix_pthread_c_success" = "1"; then internal_useless=1 $1 else @@ -664,6 +317,6 @@ else $2 fi -unset pmix_pthread_c_success pmix_pthread_fortran_success pmix_pthread_cxx_success +unset pmix_pthread_c_success unset internal_useless ])dnl diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_config_threads.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_config_threads.m4 index 541e63f726c..050f8735577 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_config_threads.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_config_threads.m4 @@ -11,7 +11,7 @@ dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved. -dnl Copyright (c) 2014-2017 Intel, Inc. All rights reserved. +dnl Copyright (c) 2014-2019 Intel, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ @@ -56,10 +56,7 @@ if test "$HAVE_POSIX_THREADS" = "0"; then fi THREAD_CFLAGS="$PTHREAD_CFLAGS" -THREAD_FCFLAGS="$PTHREAD_FCFLAGS" -THREAD_CXXFLAGS="$PTHREAD_CXXFLAGS" THREAD_CPPFLAGS="$PTHREAD_CPPFLAGS" -THREAD_CXXCPPFLAGS="$PTHREAD_CXXCPPFLAGS" THREAD_LDFLAGS="$PTHREAD_LDFLAGS" THREAD_LIBS="$PTHREAD_LIBS" diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_search_libs.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_search_libs.m4 index 9d7a8af9a74..d900c8a93b4 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_search_libs.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_search_libs.m4 @@ -1,7 +1,7 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2014 Intel, Inc. All rights reserved. +dnl Copyright (c) 2014-2019 Intel, Inc. All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -21,7 +21,13 @@ dnl # PMIX_SEARCH_LIBS_COMPONENT. The reason why is because this macro # calls PMIX_WRAPPER_FLAGS_ADD -- see big comment in # pmix_setup_wrappers.m4 for an explanation of why this is bad). +# NOTE: PMIx doesn't have wrapper compilers, so this is not an issue +# here - we leave the note just for downstream compatibility AC_DEFUN([PMIX_SEARCH_LIBS_CORE],[ + + PMIX_VAR_SCOPE_PUSH([LIBS_save add]) + LIBS_save=$LIBS + AC_SEARCH_LIBS([$1], [$2], [pmix_have_$1=1 $3], @@ -31,4 +37,33 @@ AC_DEFUN([PMIX_SEARCH_LIBS_CORE],[ AC_DEFINE_UNQUOTED([PMIX_HAVE_]m4_toupper($1), [$pmix_have_$1], [whether $1 is found and available]) + PMIX_VAR_SCOPE_POP +])dnl + +# PMIX SEARCH_LIBS_COMPONENT(prefix, func, list-of-libraries, +# action-if-found, action-if-not-found, +# other-libraries) +# +# Same as PMIX SEARCH_LIBS_CORE, above, except that we don't call PMIX +# WRAPPER_FLAGS_ADD. Instead, we add it to the ${prefix}_LIBS +# variable (i.e., $prefix is usually "framework_component", such as +# "fbtl_posix"). +AC_DEFUN([PMIX_SEARCH_LIBS_COMPONENT],[ + + PMIX_VAR_SCOPE_PUSH([LIBS_save add]) + LIBS_save=$LIBS + + AC_SEARCH_LIBS([$2], [$3], + [ # Found it! See if anything was added to LIBS + add=`printf '%s\n' "$LIBS" | sed -e "s/$LIBS_save$//"` + AS_IF([test -n "$add"], + [PMIX_FLAGS_APPEND_UNIQ($1_LIBS, [$add])]) + $1_have_$2=1 + $4], + [$1_have_$2=0 + $5], [$6]) + + AC_DEFINE_UNQUOTED([PMIX_HAVE_]m4_toupper($1), [$$1_have_$2], + [whether $1 is found and available]) + PMIX_VAR_SCOPE_POP ])dnl diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_hwloc.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_hwloc.m4 index 8f6ed75176d..a17313259a2 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_hwloc.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_hwloc.m4 @@ -17,11 +17,7 @@ AC_DEFUN([PMIX_HWLOC_CONFIG],[ [AC_HELP_STRING([--with-hwloc-header=HEADER], [The value that should be included in C files to include hwloc.h])]) - AC_ARG_ENABLE([embedded-hwloc], - [AC_HELP_STRING([--enable-embedded-hwloc], - [Enable use of locally embedded hwloc])]) - - AS_IF([test "$enable_embedded_hwloc" = "yes"], + AS_IF([test "$pmix_mode" = "embedded"], [_PMIX_HWLOC_EMBEDDED_MODE], [_PMIX_HWLOC_EXTERNAL]) diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libevent.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libevent.m4 index 2348a87aacb..949af1cfd58 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libevent.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libevent.m4 @@ -19,11 +19,7 @@ AC_DEFUN([PMIX_LIBEVENT_CONFIG],[ [AC_HELP_STRING([--with-libevent-header=HEADER], [The value that should be included in C files to include event.h])]) - AC_ARG_ENABLE([embedded-libevent], - [AC_HELP_STRING([--enable-embedded-libevent], - [Enable use of locally embedded libevent])]) - - AS_IF([test "$enable_embedded_libevent" = "yes"], + AS_IF([test "$pmix_mode" = "embedded"], [_PMIX_LIBEVENT_EMBEDDED_MODE], [_PMIX_LIBEVENT_EXTERNAL]) diff --git a/opal/mca/pmix/pmix3x/pmix/configure.ac b/opal/mca/pmix/pmix3x/pmix/configure.ac index 08f6981b8d5..dcea50ac001 100644 --- a/opal/mca/pmix/pmix3x/pmix/configure.ac +++ b/opal/mca/pmix/pmix3x/pmix/configure.ac @@ -157,6 +157,9 @@ LT_PREREQ([2.2.6]) pmix_enable_shared="$enable_shared" pmix_enable_static="$enable_static" +AS_IF([test ! -z "$enable_static" && test "$enable_static" == "yes"], + [CFLAGS="$CFLAGS -fPIC"]) + AM_ENABLE_SHARED AM_DISABLE_STATIC diff --git a/opal/mca/pmix/pmix3x/pmix/contrib/make_dist_tarball b/opal/mca/pmix/pmix3x/pmix/contrib/make_dist_tarball index c9a6d19c646..f3fc22b5e49 100755 --- a/opal/mca/pmix/pmix3x/pmix/contrib/make_dist_tarball +++ b/opal/mca/pmix/pmix3x/pmix/contrib/make_dist_tarball @@ -10,8 +10,10 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2015-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2019 Amazon.com, Inc. or its affiliates. All Rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -108,14 +110,6 @@ if test "$LIBEVENT" != ""; then config_args="--with-libevent=$LIBEVENT $config_args" fi -# if config_args isn't empty, then add that to the distcheck_flags -# (because we'll assumedly need those to run configure under "make -# distcheck"). -if test "$config_args" != ""; then - echo "*** Adding to distcheck_flags: $config_args" - distcheck_flags="$distcheck_flags AM_DISTCHECK_CONFIGURE_FLAGS=\"$config_args\"" -fi - export DISTCHECK_CONFIGURE_FLAGS=$config_args # diff --git a/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec b/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec index ba7ec977ba5..44b3810a926 100644 --- a/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec +++ b/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec @@ -12,7 +12,7 @@ # Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. -# Copyright (c) 2015-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2019 Intel, Inc. All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -192,7 +192,7 @@ Summary: An extended/exascale implementation of PMI Name: %{?_name:%{_name}}%{!?_name:pmix} -Version: 3.1.2 +Version: 3.1.3rc4 Release: 1%{?dist} License: BSD Group: Development/Libraries @@ -204,6 +204,7 @@ Prefix: %{_prefix} Provides: pmix Provides: pmix = %{version} BuildRoot: /var/tmp/%{name}-%{version}-%{release}-root +BuildRequires: libevent-devel %if %{disable_auto_requires} AutoReq: no %endif @@ -230,6 +231,22 @@ scalability. This RPM contains all the tools necessary to compile and link against PMIx. +# if build_all_in_one_rpm = 0, build split packages +%if !%{build_all_in_one_rpm} +%package libpmi +Summary: PMI-1 and PMI-2 compatibility libraries +Requires: %{name}%{?_isa} = %{version}-%{release} +Conflicts: slurm-libpmi + +%description libpmi +The %{name}-libpmi package contains libpmi and libpmi2 libraries that provide +the respective APIs and a copy of the PMIx library – each API is translated +into its PMIx equivalent. This is especially targeted at apps/libs that are +hardcoded to dlopen “libpmi” or “libpmi2”. +This package conflicts sith slurm-libpmi, which provides its own, incompatible +versions of libpmi.so and libpmi2.so. +%endif + ############################################################################# # # Prepatory Section @@ -346,6 +363,10 @@ export CFLAGS CXXFLAGS FCFLAGS # We don't need that in an RPM. find $RPM_BUILD_ROOT -name config.log -exec rm -f {} \; +# If we build separate RPMs, then move the libpmi.* and libpmi2.* compat libs +# out of the way +find $RPM_BUILD_ROOT -name 'libpmi.' | xargs rm -f + # First, the [optional] modulefile %if %{install_modulefile} @@ -490,6 +511,19 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT %endif %doc README INSTALL LICENSE +# if building separate RPMs, split the compatibility libs +%if !%{build_all_in_one_rpm} +%exclude %{_libdir}/libpmi.* +%exclude %{_libdir}/libpmi2.* +%exclude %{_includedir}/pmi.* +%exclude %{_includedir}/pmi2.* + +%files libpmi +%{_libdir}/libpmi.* +%{_libdir}/libpmi2.* +%{_includedir}/pmi.* +%{_includedir}/pmi2.* +%endif ############################################################################# # @@ -497,6 +531,11 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT # ############################################################################# %changelog +* Tue Apr 30 2019 Kilian Cavalotti +- Enable multiple RPMs build to allow backward compatibility PMI-1 and PMI-2 + libs to be built separate. "rpmbuild --define 'build_all_in_one_rpm 0' ..." + will build separate pmix and pmix-libpmi RPMs. + * Tue Oct 17 2017 Ralph Castain - Add PMIx bin directory diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in b/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in index 35aecfabbc2..a3039ff6748 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in @@ -272,6 +272,7 @@ typedef uint32_t pmix_rank_t; #define PMIX_LOCAL_SIZE "pmix.local.size" // (uint32_t) #procs in this job on this node #define PMIX_NODE_SIZE "pmix.node.size" // (uint32_t) #procs across all jobs on this node #define PMIX_MAX_PROCS "pmix.max.size" // (uint32_t) max #procs for this job +#define PMIX_NUM_SLOTS "pmix.num.slots" // (uint32_t) #slots allocated #define PMIX_NUM_NODES "pmix.num.nodes" // (uint32_t) #nodes in this nspace @@ -989,6 +990,23 @@ typedef uint16_t pmix_iof_channel_t; #define PMIX_FWD_STDDIAG_CHANNEL 0x0008 #define PMIX_FWD_ALL_CHANNELS 0x00ff +/* define some "hooks" external libraries can use to + * intercept memory allocation/release operations */ +static inline void* pmix_malloc(size_t n) +{ + return malloc(n); +} + +static inline void pmix_free(void *m) +{ + free(m); +} + +static inline void* pmix_calloc(size_t n, size_t m) +{ + return calloc(n, m); +} + /* declare a convenience macro for checking keys */ #define PMIX_CHECK_KEY(a, b) \ (0 == strncmp((a)->key, (b), PMIX_MAX_KEYLEN)) @@ -1030,7 +1048,7 @@ typedef struct pmix_byte_object { #define PMIX_BYTE_OBJECT_CREATE(m, n) \ do { \ - (m) = (pmix_byte_object_t*)malloc((n) * sizeof(pmix_byte_object_t)); \ + (m) = (pmix_byte_object_t*)pmix_malloc((n) * sizeof(pmix_byte_object_t)); \ if (NULL != (m)) { \ memset((m), 0, (n)*sizeof(pmix_byte_object_t)); \ } \ @@ -1045,7 +1063,7 @@ typedef struct pmix_byte_object { #define PMIX_BYTE_OBJECT_DESTRUCT(m) \ do { \ if (NULL != (m)->bytes) { \ - free((m)->bytes); \ + pmix_free((m)->bytes); \ } \ } while(0) @@ -1055,10 +1073,10 @@ typedef struct pmix_byte_object { if (NULL != (m)) { \ for (_bon=0; _bon < n; _bon++) { \ if (NULL != (m)[_bon].bytes) { \ - free((m)[_bon].bytes); \ + pmix_free((m)[_bon].bytes); \ } \ } \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } \ } while(0) @@ -1089,7 +1107,7 @@ typedef struct { #define PMIX_ENVAR_CREATE(m, n) \ do { \ - (m) = (pmix_envar_t*)calloc((n) , sizeof(pmix_envar_t)); \ + (m) = (pmix_envar_t*)pmix_calloc((n) , sizeof(pmix_envar_t)); \ } while (0) #define PMIX_ENVAR_FREE(m, n) \ do { \ @@ -1098,7 +1116,7 @@ typedef struct { for (_ek=0; _ek < (n); _ek++) { \ PMIX_ENVAR_DESTRUCT(&(m)[_ek]); \ } \ - free((m)); \ + pmix_free((m)); \ } \ } while (0) #define PMIX_ENVAR_CONSTRUCT(m) \ @@ -1110,11 +1128,11 @@ typedef struct { #define PMIX_ENVAR_DESTRUCT(m) \ do { \ if (NULL != (m)->envar) { \ - free((m)->envar); \ + pmix_free((m)->envar); \ (m)->envar = NULL; \ } \ if (NULL != (m)->value) { \ - free((m)->value); \ + pmix_free((m)->value); \ (m)->value = NULL; \ } \ } while(0) @@ -1148,14 +1166,14 @@ typedef struct pmix_data_buffer { } pmix_data_buffer_t; #define PMIX_DATA_BUFFER_CREATE(m) \ do { \ - (m) = (pmix_data_buffer_t*)calloc(1, sizeof(pmix_data_buffer_t)); \ + (m) = (pmix_data_buffer_t*)pmix_calloc(1, sizeof(pmix_data_buffer_t)); \ } while (0) #define PMIX_DATA_BUFFER_RELEASE(m) \ do { \ if (NULL != (m)->base_ptr) { \ - free((m)->base_ptr); \ + pmix_free((m)->base_ptr); \ } \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } while (0) #define PMIX_DATA_BUFFER_CONSTRUCT(m) \ @@ -1163,7 +1181,7 @@ typedef struct pmix_data_buffer { #define PMIX_DATA_BUFFER_DESTRUCT(m) \ do { \ if (NULL != (m)->base_ptr) { \ - free((m)->base_ptr); \ + pmix_free((m)->base_ptr); \ (m)->base_ptr = NULL; \ } \ (m)->pack_ptr = NULL; \ @@ -1194,12 +1212,12 @@ typedef struct pmix_proc { } pmix_proc_t; #define PMIX_PROC_CREATE(m, n) \ do { \ - (m) = (pmix_proc_t*)calloc((n) , sizeof(pmix_proc_t)); \ + (m) = (pmix_proc_t*)pmix_calloc((n) , sizeof(pmix_proc_t)); \ } while (0) #define PMIX_PROC_RELEASE(m) \ do { \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } while (0) @@ -1213,7 +1231,7 @@ typedef struct pmix_proc { #define PMIX_PROC_FREE(m, n) \ do { \ if (NULL != (m)) { \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } \ } while (0) @@ -1263,7 +1281,7 @@ typedef struct pmix_proc_info { } pmix_proc_info_t; #define PMIX_PROC_INFO_CREATE(m, n) \ do { \ - (m) = (pmix_proc_info_t*)calloc((n) , sizeof(pmix_proc_info_t)); \ + (m) = (pmix_proc_info_t*)pmix_calloc((n) , sizeof(pmix_proc_info_t)); \ } while (0) #define PMIX_PROC_INFO_RELEASE(m) \ @@ -1279,11 +1297,11 @@ typedef struct pmix_proc_info { #define PMIX_PROC_INFO_DESTRUCT(m) \ do { \ if (NULL != (m)->hostname) { \ - free((m)->hostname); \ + pmix_free((m)->hostname); \ (m)->hostname = NULL; \ } \ if (NULL != (m)->executable_name) { \ - free((m)->executable_name); \ + pmix_free((m)->executable_name); \ (m)->executable_name = NULL; \ } \ } while(0) @@ -1295,7 +1313,7 @@ typedef struct pmix_proc_info { for (_k=0; _k < (n); _k++) { \ PMIX_PROC_INFO_DESTRUCT(&(m)[_k]); \ } \ - free((m)); \ + pmix_free((m)); \ } \ } while (0) @@ -1362,7 +1380,7 @@ typedef struct pmix_value { do { \ int _ii; \ pmix_value_t *_v; \ - (m) = (pmix_value_t*)calloc((n), sizeof(pmix_value_t)); \ + (m) = (pmix_value_t*)pmix_calloc((n), sizeof(pmix_value_t)); \ _v = (pmix_value_t*)(m); \ if (NULL != (m)) { \ for (_ii=0; _ii < (int)(n); _ii++) { \ @@ -1375,7 +1393,7 @@ typedef struct pmix_value { #define PMIX_VALUE_RELEASE(m) \ do { \ PMIX_VALUE_DESTRUCT((m)); \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } while (0) @@ -1396,7 +1414,7 @@ typedef struct pmix_value { for (_vv=0; _vv < (n); _vv++) { \ PMIX_VALUE_DESTRUCT(&((m)[_vv])); \ } \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } \ } while (0) @@ -1410,7 +1428,7 @@ typedef struct pmix_value { #define PMIX_VALUE_XFER(r, v, s) \ do { \ if (NULL == (v)) { \ - (v) = (pmix_value_t*)malloc(sizeof(pmix_value_t)); \ + (v) = (pmix_value_t*)pmix_malloc(sizeof(pmix_value_t)); \ if (NULL == (v)) { \ (r) = PMIX_ERR_NOMEM; \ } else { \ @@ -1488,9 +1506,11 @@ typedef struct pmix_info { #define PMIX_INFO_CREATE(m, n) \ do { \ pmix_info_t *_i; \ - (m) = (pmix_info_t*)calloc((n), sizeof(pmix_info_t)); \ - _i = (pmix_info_t*)(m); \ - _i[(n)-1].flags = PMIX_INFO_ARRAY_END; \ + (m) = (pmix_info_t*)pmix_calloc((n), sizeof(pmix_info_t)); \ + if (NULL != (m)) { \ + _i = (pmix_info_t*)(m); \ + _i[(n)-1].flags = PMIX_INFO_ARRAY_END; \ + } \ } while (0) #define PMIX_INFO_CONSTRUCT(m) \ @@ -1511,7 +1531,7 @@ typedef struct pmix_info { for (_is=0; _is < (n); _is++) { \ PMIX_INFO_DESTRUCT(&((m)[_is])); \ } \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } \ } while (0) @@ -1570,13 +1590,13 @@ typedef struct pmix_pdata { /* utility macros for working with pmix_pdata_t structs */ #define PMIX_PDATA_CREATE(m, n) \ do { \ - (m) = (pmix_pdata_t*)calloc((n), sizeof(pmix_pdata_t)); \ + (m) = (pmix_pdata_t*)pmix_calloc((n), sizeof(pmix_pdata_t)); \ } while (0) #define PMIX_PDATA_RELEASE(m) \ do { \ PMIX_VALUE_DESTRUCT(&(m)->value); \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } while (0) @@ -1599,7 +1619,7 @@ typedef struct pmix_pdata { for (_ps=0; _ps < (n); _ps++) { \ PMIX_PDATA_DESTRUCT(&(_pdf[_ps])); \ } \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } \ } while (0) @@ -1640,7 +1660,7 @@ typedef struct pmix_app { /* utility macros for working with pmix_app_t structs */ #define PMIX_APP_CREATE(m, n) \ do { \ - (m) = (pmix_app_t*)calloc((n), sizeof(pmix_app_t)); \ + (m) = (pmix_app_t*)pmix_calloc((n), sizeof(pmix_app_t)); \ } while (0) #define PMIX_APP_INFO_CREATE(m, n) \ @@ -1652,7 +1672,7 @@ typedef struct pmix_app { #define PMIX_APP_RELEASE(m) \ do { \ PMIX_APP_DESTRUCT((m)); \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } while (0) @@ -1665,25 +1685,25 @@ typedef struct pmix_app { do { \ size_t _aii; \ if (NULL != (m)->cmd) { \ - free((m)->cmd); \ + pmix_free((m)->cmd); \ (m)->cmd = NULL; \ } \ if (NULL != (m)->argv) { \ for (_aii=0; NULL != (m)->argv[_aii]; _aii++) { \ - free((m)->argv[_aii]); \ + pmix_free((m)->argv[_aii]); \ } \ - free((m)->argv); \ + pmix_free((m)->argv); \ (m)->argv = NULL; \ } \ if (NULL != (m)->env) { \ for (_aii=0; NULL != (m)->env[_aii]; _aii++) { \ - free((m)->env[_aii]); \ + pmix_free((m)->env[_aii]); \ } \ - free((m)->env); \ + pmix_free((m)->env); \ (m)->env = NULL; \ } \ if (NULL != (m)->cwd) { \ - free((m)->cwd); \ + pmix_free((m)->cwd); \ (m)->cwd = NULL; \ } \ if (NULL != (m)->info) { \ @@ -1700,7 +1720,7 @@ typedef struct pmix_app { for (_as=0; _as < (n); _as++) { \ PMIX_APP_DESTRUCT(&((m)[_as])); \ } \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } \ } while (0) @@ -1715,7 +1735,7 @@ typedef struct pmix_query { /* utility macros for working with pmix_query_t structs */ #define PMIX_QUERY_CREATE(m, n) \ do { \ - (m) = (pmix_query_t*)calloc((n) , sizeof(pmix_query_t)); \ + (m) = (pmix_query_t*)pmix_calloc((n) , sizeof(pmix_query_t)); \ } while (0) #define PMIX_QUERY_QUALIFIERS_CREATE(m, n) \ @@ -1727,7 +1747,7 @@ typedef struct pmix_query { #define PMIX_QUERY_RELEASE(m) \ do { \ PMIX_QUERY_DESTRUCT((m)); \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } while (0) @@ -1741,9 +1761,9 @@ typedef struct pmix_query { size_t _qi; \ if (NULL != (m)->keys) { \ for (_qi=0; NULL != (m)->keys[_qi]; _qi++) { \ - free((m)->keys[_qi]); \ + pmix_free((m)->keys[_qi]); \ } \ - free((m)->keys); \ + pmix_free((m)->keys); \ (m)->keys = NULL; \ } \ if (NULL != (m)->qualifiers) { \ @@ -1760,7 +1780,7 @@ typedef struct pmix_query { for (_qs=0; _qs < (n); _qs++) { \ PMIX_QUERY_DESTRUCT(&((m)[_qs])); \ } \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } \ } while (0) @@ -2431,7 +2451,7 @@ PMIX_EXPORT pmix_status_t PMIx_Data_pack(const pmix_proc_t *target, * status_code = PMIx_Data_unpack(buffer, (void*)&dest, &num_values, PMIX_INT32); * * num_values = 5; - * string_array = malloc(num_values*sizeof(char *)); + * string_array = pmix_malloc(num_values*sizeof(char *)); * status_code = PMIx_Data_unpack(buffer, (void*)(string_array), &num_values, PMIX_STRING); * * @endcode @@ -2500,20 +2520,20 @@ static inline void pmix_value_destruct(pmix_value_t * m) { if (PMIX_STRING == (m)->type) { if (NULL != (m)->data.string) { - free((m)->data.string); + pmix_free((m)->data.string); (m)->data.string = NULL; } } else if ((PMIX_BYTE_OBJECT == (m)->type) || (PMIX_COMPRESSED_STRING == (m)->type)) { if (NULL != (m)->data.bo.bytes) { - free((m)->data.bo.bytes); + pmix_free((m)->data.bo.bytes); (m)->data.bo.bytes = NULL; (m)->data.bo.size = 0; } } else if (PMIX_DATA_ARRAY == (m)->type) { if (NULL != (m)->data.darray) { pmix_darray_destruct((m)->data.darray); - free((m)->data.darray); + pmix_free((m)->data.darray); (m)->data.darray = NULL; } } else if (PMIX_ENVAR == (m)->type) { @@ -2557,12 +2577,12 @@ static inline void pmix_darray_destruct(pmix_data_array_t *m) char **_s = (char**)m->array; size_t _si; for (_si=0; _si < m->size; _si++) { - free(_s[_si]); + pmix_free(_s[_si]); } - free(m->array); + pmix_free(m->array); m->array = NULL; } else { - free(m->array); + pmix_free(m->array); } } } @@ -2598,38 +2618,40 @@ static inline void pmix_darray_destruct(pmix_data_array_t *m) PMIX_BYTE == (t) || \ PMIX_INT8 == (t) || \ PMIX_UINT8 == (t)) { \ - (m)->array = calloc((n), sizeof(int8_t)); \ + (m)->array = pmix_calloc((n), sizeof(int8_t)); \ + } else if (PMIX_POINTER == (t)) { \ + (m)->array = pmix_calloc((n), sizeof(void*)); \ } else if (PMIX_STRING == (t)) { \ - (m)->array = calloc((n), sizeof(char*)); \ + (m)->array = pmix_calloc((n), sizeof(char*)); \ } else if (PMIX_SIZE == (t)) { \ - (m)->array = calloc((n), sizeof(size_t)); \ + (m)->array = pmix_calloc((n), sizeof(size_t)); \ } else if (PMIX_PID == (t)) { \ - (m)->array = calloc((n), sizeof(pid_t)); \ + (m)->array = pmix_calloc((n), sizeof(pid_t)); \ } else if (PMIX_INT == (t) || \ PMIX_UINT == (t) || \ PMIX_STATUS == (t)) { \ - (m)->array = calloc((n), sizeof(int)); \ + (m)->array = pmix_calloc((n), sizeof(int)); \ } else if (PMIX_IOF_CHANNEL == (t) || \ PMIX_DATA_TYPE == (t) || \ PMIX_INT16 == (t) || \ PMIX_UINT16 == (t)) { \ - (m)->array = calloc((n), sizeof(int16_t)); \ + (m)->array = pmix_calloc((n), sizeof(int16_t)); \ } else if (PMIX_PROC_RANK == (t) || \ PMIX_INFO_DIRECTIVES == (t) || \ PMIX_INT32 == (t) || \ PMIX_UINT32 == (t)) { \ - (m)->array = calloc((n), sizeof(int32_t)); \ + (m)->array = pmix_calloc((n), sizeof(int32_t)); \ } else if (PMIX_INT64 == (t) || \ PMIX_UINT64 == (t)) { \ - (m)->array = calloc((n), sizeof(int64_t)); \ + (m)->array = pmix_calloc((n), sizeof(int64_t)); \ } else if (PMIX_FLOAT == (t)) { \ - (m)->array = calloc((n), sizeof(float)); \ + (m)->array = pmix_calloc((n), sizeof(float)); \ } else if (PMIX_DOUBLE == (t)) { \ - (m)->array = calloc((n), sizeof(double)); \ + (m)->array = pmix_calloc((n), sizeof(double)); \ } else if (PMIX_TIMEVAL == (t)) { \ - (m)->array = calloc((n), sizeof(struct timeval)); \ + (m)->array = pmix_calloc((n), sizeof(struct timeval)); \ } else if (PMIX_TIME == (t)) { \ - (m)->array = calloc((n), sizeof(time_t)); \ + (m)->array = pmix_calloc((n), sizeof(time_t)); \ } \ } else { \ (m)->array = NULL; \ @@ -2637,7 +2659,7 @@ static inline void pmix_darray_destruct(pmix_data_array_t *m) } while(0) #define PMIX_DATA_ARRAY_CREATE(m, n, t) \ do { \ - (m) = (pmix_data_array_t*)calloc(1, sizeof(pmix_data_array_t)); \ + (m) = (pmix_data_array_t*)pmix_calloc(1, sizeof(pmix_data_array_t)); \ PMIX_DATA_ARRAY_CONSTRUCT((m), (n), (t)); \ } while(0) @@ -2647,7 +2669,7 @@ static inline void pmix_darray_destruct(pmix_data_array_t *m) do { \ if (NULL != (m)) { \ PMIX_DATA_ARRAY_DESTRUCT(m); \ - free((m)); \ + pmix_free((m)); \ (m) = NULL; \ } \ } while(0) diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix_rename.h.in b/opal/mca/pmix/pmix3x/pmix/include/pmix_rename.h.in index e5a74b5c2e3..a06bbfdfde7 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/pmix_rename.h.in +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix_rename.h.in @@ -444,6 +444,7 @@ #define pmix_output_close @PMIX_RENAME@pmix_output_close #define pmix_output_finalize @PMIX_RENAME@pmix_output_finalize #define pmix_output_get_verbosity @PMIX_RENAME@pmix_output_get_verbosity +#define pmix_output_check_verbosity @PMIX_RENAME@pmix_output_check_verbosity #define pmix_output_hexdump @PMIX_RENAME@pmix_output_hexdump #define pmix_output_init @PMIX_RENAME@pmix_output_init #define pmix_output_open @PMIX_RENAME@pmix_output_open @@ -452,7 +453,6 @@ #define pmix_output_set_output_file_info @PMIX_RENAME@pmix_output_set_output_file_info #define pmix_output_set_verbosity @PMIX_RENAME@pmix_output_set_verbosity #define pmix_output_switch @PMIX_RENAME@pmix_output_switch -#define pmix_output_verbose @PMIX_RENAME@pmix_output_verbose #define pmix_output_vverbose @PMIX_RENAME@pmix_output_vverbose #define pmix_path_access @PMIX_RENAME@pmix_path_access #define pmix_path_df @PMIX_RENAME@pmix_path_df diff --git a/opal/mca/pmix/pmix3x/pmix/include/pmix_version.h.in b/opal/mca/pmix/pmix3x/pmix/include/pmix_version.h.in index 44987a65929..af4a00cd5df 100644 --- a/opal/mca/pmix/pmix3x/pmix/include/pmix_version.h.in +++ b/opal/mca/pmix/pmix3x/pmix/include/pmix_version.h.in @@ -3,6 +3,8 @@ * All rights reserved. * Copyright (c) 2018 IBM Corporation. All rights reserved. * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -19,5 +21,5 @@ #define PMIX_VERSION_MINOR @pmixminor@ #define PMIX_VERSION_RELEASE @pmixrelease@ -#define PMIX_NUMERIC_VERSION 0x00030100 +#define PMIX_NUMERIC_VERSION @pmixnumeric@ #endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic_impl.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic_impl.h index ee605ca8cf1..d03f83de283 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic_impl.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic_impl.h @@ -13,7 +13,7 @@ * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -496,7 +496,7 @@ static inline int pmix_atomic_trylock(pmix_atomic_lock_t *lock) { int32_t unlocked = PMIX_ATOMIC_LOCK_UNLOCKED; - bool ret = pmix_atomic_compare_exchange_strong_32 (&lock->u.lock, &unlocked, PMIX_ATOMIC_LOCK_LOCKED); + bool ret = pmix_atomic_compare_exchange_strong_acq_32 (&lock->u.lock, &unlocked, PMIX_ATOMIC_LOCK_LOCKED); return (ret == false) ? 1 : 0; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic_stdc.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic_stdc.h index 7c4a6089090..5fc5b0a1326 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic_stdc.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic_stdc.h @@ -2,7 +2,7 @@ /* * Copyright (c) 2018 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -72,7 +72,14 @@ static inline void pmix_atomic_wmb (void) static inline void pmix_atomic_rmb (void) { +#if PMIX_ASSEMBLY_ARCH == PMIX_X86_64 + /* work around a bug in older gcc versions (observed in gcc 6.x) + * where acquire seems to get treated as a no-op instead of being + * equivalent to __asm__ __volatile__("": : :"memory") on x86_64 */ + pmix_atomic_mb (); +#else atomic_thread_fence (memory_order_acquire); +#endif } #define pmix_atomic_compare_exchange_strong_32(addr, compare, value) atomic_compare_exchange_strong_explicit (addr, compare, value, memory_order_relaxed, memory_order_relaxed) diff --git a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/powerpc/atomic.h b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/powerpc/atomic.h index cfb46eb5194..17134e11c16 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/powerpc/atomic.h +++ b/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/powerpc/atomic.h @@ -13,7 +13,7 @@ * Copyright (c) 2010-2017 IBM Corporation. All rights reserved. * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -99,27 +99,7 @@ void pmix_atomic_isync(void) ISYNC(); } -#elif PMIX_XLC_INLINE_ASSEMBLY /* end PMIX_GCC_INLINE_ASSEMBLY */ - -/* Yeah, I don't know who thought this was a reasonable syntax for - * inline assembly. Do these because they are used so often and they - * are fairly simple (aka: there is a tech pub on IBM's web site - * containing the right hex for the instructions). - */ - -#undef PMIX_HAVE_INLINE_ATOMIC_MEM_BARRIER -#define PMIX_HAVE_INLINE_ATOMIC_MEM_BARRIER 0 - -#pragma mc_func pmix_atomic_mb { "7c0004ac" } /* sync */ -#pragma reg_killed_by pmix_atomic_mb /* none */ - -#pragma mc_func pmix_atomic_rmb { "7c2004ac" } /* lwsync */ -#pragma reg_killed_by pmix_atomic_rmb /* none */ - -#pragma mc_func pmix_atomic_wmb { "7c2004ac" } /* lwsync */ -#pragma reg_killed_by pmix_atomic_wmb /* none */ - -#endif +#endif /* end PMIX_GCC_INLINE_ASSEMBLY */ /********************************************************************** * @@ -297,7 +277,7 @@ static inline bool pmix_atomic_compare_exchange_strong_64 (pmix_atomic_int64_t * #define pmix_atomic_sc_64(addr, value, ret) \ do { \ pmix_atomic_int64_t *_addr = (addr); \ - int64_t _foo, _newval = (int64_t) value; \ + int64_t _newval = (int64_t) value; \ int32_t _ret; \ \ __asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t" \ diff --git a/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_notification.c index c2585ea84ae..574607ec4b0 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_notification.c @@ -293,7 +293,7 @@ static pmix_status_t notify_server_of_event(pmix_status_t status, PMIX_RELEASE(cb); goto cleanup; } - } else { + } else if (NULL != cbfunc) { cbfunc(PMIX_SUCCESS, cbdata); } @@ -977,16 +977,6 @@ static void _notify_client_event(int sd, short args, void *cbdata) if (!pmix_notify_check_range(&rngtrk, &proc)) { continue; } - if (NULL != cd->targets) { - /* track the number of targets we have left to notify */ - --cd->nleft; - /* if the event was cached and this is the last one, - * then evict this event from the cache */ - if (0 == cd->nleft) { - pmix_hotel_checkout(&pmix_globals.notifications, cd->room); - PMIX_RELEASE(cd); - } - } pmix_output_verbose(2, pmix_server_globals.event_output, "pmix_server: notifying client %s:%u on status %s", pr->peer->info->pname.nspace, pr->peer->info->pname.rank, @@ -1044,6 +1034,17 @@ static void _notify_client_event(int sd, short args, void *cbdata) if (PMIX_SUCCESS != rc) { PMIX_RELEASE(bfr); } + if (NULL != cd->targets && 0 < cd->nleft) { + /* track the number of targets we have left to notify */ + --cd->nleft; + /* if the event was cached and this is the last one, + * then evict this event from the cache */ + if (0 == cd->nleft) { + pmix_hotel_checkout(&pmix_globals.notifications, cd->room); + holdcd = false; + break; + } + } } } } diff --git a/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_registration.c b/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_registration.c index 2607d6b101d..be2346048d8 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_registration.c +++ b/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_registration.c @@ -78,6 +78,8 @@ PMIX_CLASS_INSTANCE(pmix_rshift_caddy_t, static void check_cached_events(pmix_rshift_caddy_t *cd); +/* catch the event registration response message from the + * server and process it */ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr, pmix_buffer_t *buf, void *cbdata) { @@ -100,7 +102,9 @@ static void regevents_cbfunc(struct pmix_peer_t *peer, pmix_ptl_hdr_t *hdr, } else { PMIX_ERROR_LOG(ret); } - /* remove the err handler and call the error handler reg completion callback fn.*/ + /* remove the err handler and call the error handler + * reg completion callback fn so the requestor + * doesn't hang */ if (NULL == rb->list) { if (NULL != rb->hdlr) { PMIX_RELEASE(rb->hdlr); @@ -834,7 +838,7 @@ static void reg_event_hdlr(int sd, short args, void *cbdata) cd->evregcbfn(rc, index, cd->cbdata); } - /* check if any matching notifications have been cached */ + /* check if any matching notifications have been locally cached */ check_cached_events(cd); if (NULL != cd->codes) { free(cd->codes); diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h index 212b5b51014..202679cc4ee 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h @@ -110,6 +110,9 @@ typedef uint8_t pmix_cmd_t; /* provide a "pretty-print" function for cmds */ const char* pmix_command_string(pmix_cmd_t cmd); +/* provide a hook to init tool data */ +PMIX_EXPORT extern pmix_status_t pmix_tool_init_info(void); + /* define a set of flags to direct collection * of data during operations */ typedef enum { @@ -280,7 +283,7 @@ typedef struct { pmix_list_item_t super; pmix_event_t ev; bool event_active; - bool lost_connection; // tracker went thru lost connection procedure + bool host_called; // tracker has been passed up to host bool local; // operation is strictly local char *id; // string identifier for the collective pmix_cmd_t type; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/base/help-pmix-mca-base.txt b/opal/mca/pmix/pmix3x/pmix/src/mca/base/help-pmix-mca-base.txt index 7a96e7ace8f..3c8a67f1990 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/base/help-pmix-mca-base.txt +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/base/help-pmix-mca-base.txt @@ -10,8 +10,8 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2018 Intel, Inc. All rights reserved. +# Copyright (c) 2008-2019 Cisco Systems, Inc. All rights reserved +# Copyright (c) 2018-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -60,3 +60,12 @@ all components *except* a and b", while "c,d" specifies the inclusive behavior and means "use *only* components c and d." You cannot mix inclusive and exclusive behavior. +# +[failed to add component dir] +The pmix_mca_base_component_path MCA variable was used to add paths to +search for PMIX components. At least one directory failed to add +properly: + + %s + +Check to make sure that this directory exists, is readable, etc. diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_repository.c b/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_repository.c index eb7dda21b56..062b1cb75d6 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_repository.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_repository.c @@ -10,12 +10,12 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008-2019 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,6 +43,7 @@ #include "pmix_common.h" #include "src/class/pmix_hash_table.h" #include "src/util/basename.h" +#include "src/util/show_help.h" #if PMIX_HAVE_PDL_SUPPORT @@ -220,8 +221,13 @@ int pmix_mca_base_component_repository_add (const char *path) dir = pmix_mca_base_system_default_path; } - if (0 != pmix_pdl_foreachfile(dir, process_repository_item, NULL)) { - break; + if (0 != pmix_pdl_foreachfile(dir, process_repository_item, NULL) && + !(0 == strcmp(dir, pmix_mca_base_system_default_path) || 0 == strcmp(dir, pmix_mca_base_user_default_path))) { + // It is not an error if a directory fails to add (e.g., + // if it doesn't exist). But we should warn about it as + // it is something related to "show_load_errors" + pmix_show_help("help-pmix-mca-base.txt", + "failed to add component dir", true, dir); } } while (NULL != (dir = strtok_r (NULL, sep, &ctx))); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/Makefile.am index db788f3a7c8..5855a6aeae3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v12/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -51,6 +51,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_bfrops_v12_la_SOURCES = $(component_sources) mca_bfrops_v12_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_bfrops_v12_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_bfrops_v12_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/Makefile.am index ffe16123206..42eb14940aa 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -51,6 +51,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_bfrops_v20_la_SOURCES = $(component_sources) mca_bfrops_v20_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_bfrops_v20_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_bfrops_v20_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v21/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v21/Makefile.am index d4da3258b44..1658de9d1db 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v21/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v21/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -44,6 +44,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_bfrops_v21_la_SOURCES = $(component_sources) mca_bfrops_v21_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_bfrops_v21_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_bfrops_v21_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v3/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v3/Makefile.am index 14438eac7f1..d14a13258ff 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v3/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v3/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -44,6 +44,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_bfrops_v3_la_SOURCES = $(component_sources) mca_bfrops_v3_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_bfrops_v3_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_bfrops_v3_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/base/gds_base_fns.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/base/gds_base_fns.c index b9f8533c0ef..abec7a744da 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/base/gds_base_fns.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/base/gds_base_fns.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2018 IBM Corporation. All rights reserved. @@ -79,7 +79,8 @@ pmix_status_t pmix_gds_base_setup_fork(const pmix_proc_t *proc, if (NULL == active->module->setup_fork) { continue; } - if (PMIX_SUCCESS != (rc = active->module->setup_fork(proc, env))) { + rc = active->module->setup_fork(proc, env); + if (PMIX_SUCCESS != rc && PMIX_ERR_NOT_AVAILABLE != rc) { return rc; } } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/Makefile.am index dc799c892f8..eae7ef34abc 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2017 Mellanox Technologies, Inc. @@ -64,6 +64,9 @@ mcacomponent_LTLIBRARIES = $(component) mca_gds_ds12_la_SOURCES = $(component_sources) mca_gds_ds12_la_LDFLAGS = -module -avoid-version \ $(PMIX_TOP_BUILDDIR)/src/mca/common/dstore/libmca_common_dstore.la +if NEED_LIBPMIX +mca_gds_ds12_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_gds_ds12_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/Makefile.am index 215275754d7..06e1dd13a90 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2017-2018 Mellanox Technologies, Inc. @@ -56,6 +56,9 @@ mcacomponent_LTLIBRARIES = $(component) mca_gds_ds21_la_SOURCES = $(component_sources) mca_gds_ds21_la_LDFLAGS = -module -avoid-version \ $(PMIX_TOP_BUILDDIR)/src/mca/common/dstore/libmca_common_dstore.la +if NEED_LIBPMIX +mca_gds_ds21_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_gds_ds21_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock_pthread.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock_pthread.c index 5e8b7be92b0..99713f5651e 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock_pthread.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds21/gds_ds21_lock_pthread.c @@ -2,7 +2,7 @@ * Copyright (c) 2018 Mellanox Technologies, Inc. * All rights reserved. * - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -234,6 +234,7 @@ pmix_status_t pmix_gds_ds21_lock_init(pmix_common_dstor_lock_ctx_t *ctx, const c rc = PMIX_ERR_NOT_FOUND; goto error; } + seg_hdr = (segment_hdr_t*)lock_item->seg_desc->seg_info.seg_base_addr; } lock_item->num_locks = seg_hdr->num_locks; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/Makefile.am index 7d9da0189e2..4067145ff28 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -49,6 +49,9 @@ mcacomponent_LTLIBRARIES = $(component) mca_gds_hash_la_SOURCES = $(component_sources) mca_gds_hash_la_LIBADD = $(gds_hash_LIBS) mca_gds_hash_la_LDFLAGS = -module -avoid-version $(gds_hash_LDFLAGS) +if NEED_LIBPMIX +mca_gds_hash_la_LIBADD += $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_gds_hash_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c index 4b02d8faf21..4e092fc5a68 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016-2018 IBM Corporation. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -201,8 +201,16 @@ static pmix_status_t hash_assign_module(pmix_info_t *info, size_t ninfo, return PMIX_SUCCESS; } +/* Define a bitmask to track what information may not have + * been provided but is computable from other info */ +#define PMIX_HASH_PROC_DATA 0x00000001 +#define PMIX_HASH_JOB_SIZE 0x00000002 +#define PMIX_HASH_MAX_PROCS 0x00000004 +#define PMIX_HASH_NUM_NODES 0x00000008 + static pmix_status_t store_map(pmix_hash_table_t *ht, - char **nodes, char **ppn) + char **nodes, char **ppn, + uint32_t flags) { pmix_status_t rc; pmix_value_t *val; @@ -212,6 +220,8 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, bool updated; pmix_kval_t *kp2; char **procs; + uint32_t totalprocs=0; + bool localldr; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%d] gds:hash:store_map", @@ -223,6 +233,22 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, return PMIX_ERR_BAD_PARAM; } + /* if they didn't provide the number of nodes, then + * compute it from the list of nodes */ + if (!(PMIX_HASH_NUM_NODES & flags)) { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_NUM_NODES); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_UINT32; + kp2->value->data.uint32 = pmix_argv_count(nodes); + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + } + for (n=0; NULL != nodes[n]; n++) { /* check and see if we already have data for this node */ val = NULL; @@ -240,18 +266,22 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, } iptr = (pmix_info_t*)val->data.darray->array; updated = false; + localldr = false; for (m=0; m < val->data.darray->size; m++) { - if (0 == strncmp(iptr[m].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN)) { + if (PMIX_CHECK_KEY(&iptr[m], PMIX_LOCAL_PEERS)) { /* we will update this entry */ if (NULL != iptr[m].value.data.string) { free(iptr[m].value.data.string); } iptr[m].value.data.string = strdup(ppn[n]); - updated = true; - break; + updated = true; // no need to add the local_peers to the array + } else if (PMIX_CHECK_KEY(&iptr[m], PMIX_LOCALLDR)) { + rank = strtoul(ppn[n], NULL, 10); + iptr[m].value.data.rank = rank; + localldr = true; // no need to add localldr to the array } } - if (!updated) { + if (!updated || !localldr) { /* append this entry to the current data */ kp2 = PMIX_NEW(pmix_kval_t); if (NULL == kp2) { @@ -270,7 +300,18 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, return PMIX_ERR_NOMEM; } kp2->value->data.darray->type = PMIX_INFO; - kp2->value->data.darray->size = val->data.darray->size + 1; + /* if we didn't update the local leader, then we will + * add it here */ + m = 0; + if (!localldr) { + kp2->value->data.darray->size = val->data.darray->size + 1; + ++m; + } + /* if they didn't update the local peers, then we add it here */ + if (!updated) { + kp2->value->data.darray->size = val->data.darray->size + 1; + ++m; + } PMIX_INFO_CREATE(info, kp2->value->data.darray->size); if (NULL == info) { PMIX_RELEASE(kp2); @@ -280,7 +321,15 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, for (m=0; m < val->data.darray->size; m++) { PMIX_INFO_XFER(&info[m], &iptr[m]); } - PMIX_INFO_LOAD(&info[kp2->value->data.darray->size-1], PMIX_LOCAL_PEERS, ppn[n], PMIX_STRING); + if (!updated) { + PMIX_INFO_LOAD(&info[kp2->value->data.darray->size-m], PMIX_LOCAL_PEERS, ppn[n], PMIX_STRING); + --m; + } + if (!localldr) { + rank = strtoul(ppn[n], NULL, 10); + PMIX_INFO_LOAD(&info[kp2->value->data.darray->size-m], PMIX_LOCALLDR, &rank, PMIX_PROC_RANK); + --m; + } kp2->value->data.darray->array = info; if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { PMIX_ERROR_LOG(rc); @@ -308,14 +357,16 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, return PMIX_ERR_NOMEM; } kp2->value->data.darray->type = PMIX_INFO; - PMIX_INFO_CREATE(info, 1); + PMIX_INFO_CREATE(info, 2); if (NULL == info) { PMIX_RELEASE(kp2); return PMIX_ERR_NOMEM; } PMIX_INFO_LOAD(&info[0], PMIX_LOCAL_PEERS, ppn[n], PMIX_STRING); + rank = strtoul(ppn[n], NULL, 10); + PMIX_INFO_LOAD(&info[1], PMIX_LOCALLDR, &rank, PMIX_PROC_RANK); kp2->value->data.darray->array = info; - kp2->value->data.darray->size = 1; + kp2->value->data.darray->size = 2; if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(kp2); @@ -326,6 +377,7 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, /* split the list of procs so we can store their * individual location data */ procs = pmix_argv_split(ppn[n], ','); + totalprocs += pmix_argv_count(procs); for (m=0; NULL != procs[m]; m++) { /* store the hostname for each proc */ kp2 = PMIX_NEW(pmix_kval_t); @@ -341,6 +393,48 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, return rc; } PMIX_RELEASE(kp2); // maintain acctg + if (!(PMIX_HASH_PROC_DATA & flags)) { + /* add an entry for the nodeid */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_NODEID); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_UINT32; + kp2->value->data.uint32 = n; + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, rank, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + pmix_argv_free(procs); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + /* add an entry for the local rank */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_LOCAL_RANK); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_UINT16; + kp2->value->data.uint16 = m; + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, rank, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + pmix_argv_free(procs); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + /* add an entry for the node rank - for now, we assume + * only the one job is running */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_NODE_RANK); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_UINT16; + kp2->value->data.uint16 = m; + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, rank, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + pmix_argv_free(procs); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + } } pmix_argv_free(procs); } @@ -360,6 +454,41 @@ static pmix_status_t store_map(pmix_hash_table_t *ht, } PMIX_RELEASE(kp2); // maintain acctg + /* if they didn't provide the job size, compute it as + * being the number of provided procs (i.e., size of + * ppn list) */ + if (!(PMIX_HASH_JOB_SIZE & flags)) { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_JOB_SIZE); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_UINT32; + kp2->value->data.uint32 = totalprocs; + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + } + + /* if they didn't provide a value for max procs, just + * assume it is the same as the number of procs in the + * job and store it */ + if (!(PMIX_HASH_MAX_PROCS & flags)) { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_MAX_PROCS); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_UINT32; + kp2->value->data.uint32 = totalprocs; + if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + } + + return PMIX_SUCCESS; } @@ -376,6 +505,7 @@ pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, pmix_rank_t rank; pmix_status_t rc=PMIX_SUCCESS; size_t n, j, size, len; + uint32_t flags = 0; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%d] gds:hash:cache_job_info for nspace %s", @@ -431,29 +561,14 @@ pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, PMIX_ERROR_LOG(rc); goto release; } - /* if we have already found the proc map, then parse - * and store the detailed map */ - if (NULL != procs) { - if (PMIX_SUCCESS != (rc = store_map(ht, nodes, procs))) { - PMIX_ERROR_LOG(rc); - goto release; - } - } } else if (0 == strcmp(info[n].key, PMIX_PROC_MAP)) { /* parse the regex to get the argv array containing proc ranks on each node */ if (PMIX_SUCCESS != (rc = pmix_preg.parse_procs(info[n].value.data.string, &procs))) { PMIX_ERROR_LOG(rc); goto release; } - /* if we have already recv'd the node map, then parse - * and store the detailed map */ - if (NULL != nodes) { - if (PMIX_SUCCESS != (rc = store_map(ht, nodes, procs))) { - PMIX_ERROR_LOG(rc); - goto release; - } - } } else if (0 == strcmp(info[n].key, PMIX_PROC_DATA)) { + flags |= PMIX_HASH_PROC_DATA; /* an array of data pertaining to a specific proc */ if (PMIX_DATA_ARRAY != info[n].value.type) { PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); @@ -543,9 +658,15 @@ pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, goto release; } PMIX_RELEASE(kp2); // maintain acctg - /* if this is the job size, then store it */ - if (0 == strncmp(info[n].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN)) { + /* if this is the job size, then store it in + * the nptr tracker and flag that we were given it */ + if (PMIX_CHECK_KEY(&info[n], PMIX_JOB_SIZE)) { nptr->nprocs = info[n].value.data.uint32; + flags |= PMIX_HASH_JOB_SIZE; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_NUM_NODES)) { + flags |= PMIX_HASH_NUM_NODES; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_MAX_PROCS)) { + flags |= PMIX_HASH_MAX_PROCS; } } } @@ -577,6 +698,17 @@ pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, trk->gdata_added = true; } + /* we must have the proc AND node maps */ + if (NULL == procs || NULL == nodes) { + rc = PMIX_ERR_NOT_FOUND; + goto release; + } + + if (PMIX_SUCCESS != (rc = store_map(ht, nodes, procs, flags))) { + PMIX_ERROR_LOG(rc); + goto release; + } + release: if (NULL != nodes) { pmix_argv_free(nodes); @@ -644,25 +776,24 @@ static pmix_status_t register_info(pmix_peer_t *peer, for (rank=0; rank < ns->nprocs; rank++) { val = NULL; rc = pmix_hash_fetch(ht, rank, NULL, &val); - if (PMIX_SUCCESS != rc) { + if (PMIX_SUCCESS != rc && PMIX_ERR_PROC_ENTRY_NOT_FOUND != rc) { PMIX_ERROR_LOG(rc); if (NULL != val) { PMIX_VALUE_RELEASE(val); } return rc; } - if (NULL == val) { - return PMIX_ERR_NOT_FOUND; - } PMIX_CONSTRUCT(&buf, pmix_buffer_t); PMIX_BFROPS_PACK(rc, peer, &buf, &rank, 1, PMIX_PROC_RANK); - info = (pmix_info_t*)val->data.darray->array; - ninfo = val->data.darray->size; - for (n=0; n < ninfo; n++) { - kv.key = info[n].key; - kv.value = &info[n].value; - PMIX_BFROPS_PACK(rc, peer, &buf, &kv, 1, PMIX_KVAL); + if (NULL != val) { + info = (pmix_info_t*)val->data.darray->array; + ninfo = val->data.darray->size; + for (n=0; n < ninfo; n++) { + kv.key = info[n].key; + kv.value = &info[n].value; + PMIX_BFROPS_PACK(rc, peer, &buf, &kv, 1, PMIX_KVAL); + } } kv.key = PMIX_PROC_BLOB; kv.value = &blob; @@ -1327,7 +1458,6 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, val = NULL; rc = pmix_hash_fetch(ht, PMIX_RANK_WILDCARD, NULL, &val); if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); if (NULL != val) { PMIX_VALUE_RELEASE(val); } @@ -1341,7 +1471,6 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, if (PMIX_DATA_ARRAY != val->type || NULL == val->data.darray || PMIX_INFO != val->data.darray->type) { - PMIX_ERROR_LOG(PMIX_ERR_INVALID_VAL); PMIX_VALUE_RELEASE(val); return PMIX_ERR_INVALID_VAL; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pdl/configure.m4 b/opal/mca/pmix/pmix3x/pmix/src/mca/pdl/configure.m4 index c5082065b23..1e749df5b2d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pdl/configure.m4 +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pdl/configure.m4 @@ -1,8 +1,8 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. -dnl Copyright (c) 2016-2017 Intel, Inc. All rights reserved. -dnl Copyright (c) 2016 Research Organization for Information Science +dnl Copyright (c) 2016-2019 Intel, Inc. All rights reserved. +dnl Copyright (c) 2016-2019 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl @@ -27,7 +27,7 @@ AC_DEFUN([MCA_pmix_pdl_CONFIG],[ # (we still need to configure them all so that things like "make # dist" work", but we just want the MCA system to (artificially) # conclude that it can't build any of the components. - AS_IF([test "$enable_dlopen" = "no"], + AS_IF([test $PMIX_ENABLE_DLOPEN_SUPPORT -eq 0], [want_pdl=0], [want_pdl=1]) MCA_CONFIGURE_FRAMEWORK([pdl], [$want_pdl]) @@ -35,7 +35,7 @@ AC_DEFUN([MCA_pmix_pdl_CONFIG],[ # If we found no suitable static pdl component and dlopen support # was not specifically disabled, this is an error. AS_IF([test "$MCA_pmix_pdl_STATIC_COMPONENTS" = "" && \ - test "$enable_dlopen" != "no"], + test $PMIX_ENABLE_DLOPEN_SUPPORT -eq 1], [AC_MSG_WARN([Did not find a suitable static pmix pdl component]) AC_MSG_WARN([You might need to install libltld (and its headers) or]) AC_MSG_WARN([specify --disable-dlopen to configure.]) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/default/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/default/Makefile.am index aa141f9d8ff..369a06269f3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/default/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/default/Makefile.am @@ -10,7 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # @@ -40,6 +40,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_plog_default_la_SOURCES = $(sources) mca_plog_default_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_plog_default_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(component_noinst) libmca_plog_default_la_SOURCES =$(sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/Makefile.am index 497dfaaf1a7..0cdd43d60cb 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/Makefile.am @@ -10,7 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # @@ -40,6 +40,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_plog_stdfd_la_SOURCES = $(sources) mca_plog_stdfd_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_plog_stdfd_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(component_noinst) libmca_plog_stdfd_la_SOURCES =$(sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/syslog/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/syslog/Makefile.am index 7a09d28fac1..ba79c07fe73 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/syslog/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/syslog/Makefile.am @@ -10,7 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # @@ -40,6 +40,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_plog_syslog_la_SOURCES = $(sources) mca_plog_syslog_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_plog_syslog_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(component_noinst) libmca_plog_syslog_la_SOURCES =$(sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c index c4869da529d..d62268dbd52 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c @@ -209,7 +209,8 @@ pmix_status_t pmix_pnet_base_setup_fork(const pmix_proc_t *proc, char ***env) PMIX_LIST_FOREACH(active, &pmix_pnet_globals.actives, pmix_pnet_base_active_module_t) { if (NULL != active->module->setup_fork) { - if (PMIX_SUCCESS != (rc = active->module->setup_fork(nptr, proc, env))) { + rc = active->module->setup_fork(nptr, proc, env); + if (PMIX_SUCCESS != rc && PMIX_ERR_NOT_AVAILABLE != rc) { return rc; } } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/Makefile.am index 1223b43eca4..fe01cde836e 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -49,6 +49,9 @@ mcacomponent_LTLIBRARIES = $(component) mca_pnet_opa_la_SOURCES = $(component_sources) mca_pnet_opa_la_LIBADD = $(pnet_opa_LIBS) mca_pnet_opa_la_LDFLAGS = -module -avoid-version $(pnet_opa_LDFLAGS) +if NEED_LIBPMIX +mca_pnet_opa_la_LIBADD += $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_pnet_opa_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/Makefile.am index 946d81c8fba..048f34b0b63 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -49,6 +49,9 @@ mcacomponent_LTLIBRARIES = $(component) mca_pnet_tcp_la_SOURCES = $(component_sources) mca_pnet_tcp_la_LIBADD = $(pnet_tcp_LIBS) mca_pnet_tcp_la_LDFLAGS = -module -avoid-version $(pnet_tcp_LDFLAGS) +if NEED_LIBPMIX +mca_pnet_tcp_la_LIBADD += $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_pnet_tcp_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/Makefile.am index 3faf68a32c2..b71000ef555 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -46,6 +46,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_pnet_test_la_SOURCES = $(component_sources) mca_pnet_test_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_pnet_test_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_pnet_test_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/Makefile.am index fa51393622f..607dcdb0c96 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -44,6 +44,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_preg_native_la_SOURCES = $(component_sources) mca_preg_native_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_preg_native_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_preg_native_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c index 0d31f96435a..0c9d6188a0d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/preg/native/preg_native.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016-2019 IBM Corporation. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -893,7 +893,7 @@ static pmix_status_t regex_parse_value_range(char *base, char *range, for (found = false, i = 0; i < len; ++i) { if (isdigit((int) range[i])) { if (!found) { - start = atoi(range + i); + start = strtol(range + i, NULL, 10); found = true; break; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/Makefile.am new file mode 100644 index 00000000000..1dd3853eb2d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/Makefile.am @@ -0,0 +1,59 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2019 Mellanox Technologies, Inc. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +if MCA_BUILD_PSEC_DUMMY_HANDSHAKE + +headers = psec_dummy_handshake.h +sources = \ + psec_dummy_handshake_component.c \ + psec_dummy_handshake.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_pmix_psec_dummy_handshake_DSO +lib = +lib_sources = +component = mca_psec_dummy_handshake.la +component_sources = $(headers) $(sources) +else +lib = libmca_psec_dummy_handshake.la +lib_sources = $(headers) $(sources) +component = +component_sources = +endif + +mcacomponentdir = $(pmixlibdir) +mcacomponent_LTLIBRARIES = $(component) +mca_psec_dummy_handshake_la_SOURCES = $(component_sources) +mca_psec_dummy_handshake_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_psec_dummy_handshake_la_LIBADD = $(top_builddir)/src/libpmix.la +endif + +noinst_LTLIBRARIES = $(lib) +libmca_psec_dummy_handshake_la_SOURCES = $(lib_sources) +libmca_psec_dummy_handshake_la_LDFLAGS = -module -avoid-version + +endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake.c b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake.c new file mode 100644 index 00000000000..ae1f9b62e59 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake.c @@ -0,0 +1,170 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2019 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include +#ifdef HAVE_SYS_TYPES_H +#include +#endif + +#include + +#include "src/include/pmix_globals.h" +#include "src/util/error.h" +#include "src/util/output.h" + +#include "src/mca/psec/base/base.h" +#include "psec_dummy_handshake.h" + +#include "src/mca/ptl/base/base.h" + +#define PMIX_PSEC_DUMMY_HNDSHK_STR "PMIX_PSEC_DUMMY_HANDSHAKE_STRING" + +static pmix_status_t simple_init(void); +static void simple_finalize(void); +static pmix_status_t create_cred(struct pmix_peer_t *peer, + const pmix_info_t directives[], size_t ndirs, + pmix_info_t **info, size_t *ninfo, + pmix_byte_object_t *cred); +static pmix_status_t client_hndshk(int sd); +static pmix_status_t server_hndshk(int sd); + +pmix_psec_module_t pmix_dummy_handshake_module = { + .name = "dummy_handshake", + /** init/finalize */ + .init = simple_init, + .finalize = simple_finalize, + /** Client-side */ + .create_cred = create_cred, + .client_handshake = client_hndshk, + /** Server-side */ + .validate_cred = NULL, + .server_handshake = server_hndshk +}; + +static pmix_status_t simple_init(void) +{ + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, + "psec: simple init"); + return PMIX_SUCCESS; +} + +static void simple_finalize(void) +{ + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, + "psec: simple finalize"); +} + +static pmix_status_t create_cred(struct pmix_peer_t *peer, + const pmix_info_t directives[], size_t ndirs, + pmix_info_t **info, size_t *ninfo, + pmix_byte_object_t *cred) +{ + char mycred[] = "dymmy_cred"; + + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, + "psec: simple create_cred"); + + /* ensure initialization */ + PMIX_BYTE_OBJECT_CONSTRUCT(cred); + + cred->bytes = strdup(mycred); + cred->size = strlen(mycred) + 1; + + return PMIX_SUCCESS; +} + +static pmix_status_t server_hndshk(int sd) +{ + pmix_status_t rc, status = PMIX_SUCCESS; + char *hndshk_msg = NULL; + size_t size; + + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, + "psec: simple server_hndshk"); + + asprintf(&hndshk_msg, "%s", PMIX_PSEC_DUMMY_HNDSHK_STR); + size = strlen(hndshk_msg); + + /* send size of handshake message */ + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(sd, (char*)&size, + sizeof(size)))) { + goto exit; + } + /* send handshake message */ + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(sd, hndshk_msg, + size))) { + goto exit; + } + /* recv hadshake status from client */ + if (PMIX_SUCCESS != (rc = pmix_ptl_base_recv_blocking(sd, (char*)&status, + sizeof(status)))) { + goto exit; + } + rc = status; + pmix_output(0, "[%s:%d] psec handshake status %d recv from client", + __FILE__, __LINE__, status); + +exit: + if (NULL != hndshk_msg) { + free(hndshk_msg); + } + + return rc; +} + +static pmix_status_t client_hndshk(int sd) +{ + char *hndshk_msg = NULL; + size_t size; + pmix_status_t rc, status = PMIX_SUCCESS; + + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, + "psec: simple client_hndshk"); + + /* recv size of handshake message */ + if (PMIX_SUCCESS != (rc = pmix_ptl_base_recv_blocking(sd, (char*)&size, + sizeof(size_t)))) { + return rc; + } + hndshk_msg = (char*)malloc(size); + /* recv handshake message */ + if (PMIX_SUCCESS != (rc = pmix_ptl_base_recv_blocking(sd, (char*)hndshk_msg, + size))) { + free(hndshk_msg); + return rc; + } + /* verifying handshake data */ + if (size != strlen(PMIX_PSEC_DUMMY_HNDSHK_STR)) { + rc = PMIX_ERR_HANDSHAKE_FAILED; + goto exit; + } + if (0 != strncmp(hndshk_msg, PMIX_PSEC_DUMMY_HNDSHK_STR, size)) { + rc = PMIX_ERR_HANDSHAKE_FAILED; + goto exit; + } + + /* send hadshake status to the server */ + status = PMIX_SUCCESS; + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(sd, (char*)&status, + sizeof(status)))) { + goto exit; + } + pmix_output(0, "[%s:%d] psec handshake status %d sent to server", + __FILE__, __LINE__, status); +exit: + if (NULL != hndshk_msg) { + free(hndshk_msg); + } + return rc; +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake.h b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake.h new file mode 100644 index 00000000000..74cc3632213 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake.h @@ -0,0 +1,29 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2019 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PMIX_SIMPLE_H +#define PMIX_SIMPLE_H + +#include + + +#include "src/mca/psec/psec.h" + +BEGIN_C_DECLS + +/* the component must be visible data for the linker to find it */ +PMIX_EXPORT extern pmix_psec_base_component_t mca_psec_dummy_handshake_component; +extern pmix_psec_module_t pmix_dummy_handshake_module; + +END_C_DECLS + +#endif diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake_component.c new file mode 100644 index 00000000000..53fb13b6fed --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/dummy_handshake/psec_dummy_handshake_component.c @@ -0,0 +1,73 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2019 Mellanox Technologies, Inc. + * All rights reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include "pmix_common.h" + +#include "src/mca/base/pmix_mca_base_var.h" +#include "src/mca/psec/psec.h" +#include "psec_dummy_handshake.h" + +static pmix_status_t component_open(void); +static pmix_status_t component_close(void); +static pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority); +static pmix_psec_module_t* assign_module(void); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +pmix_psec_base_component_t mca_psec_dummy_handshake_component = { + .base = { + PMIX_PSEC_BASE_VERSION_1_0_0, + + /* Component name and version */ + .pmix_mca_component_name = "dummy_handshake", + PMIX_MCA_BASE_MAKE_VERSION(component, + PMIX_MAJOR_VERSION, + PMIX_MINOR_VERSION, + PMIX_RELEASE_VERSION), + + /* Component open and close functions */ + .pmix_mca_open_component = component_open, + .pmix_mca_close_component = component_close, + .pmix_mca_query_component = component_query, + }, + .data = { + /* The component is checkpoint ready */ + PMIX_MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + .assign_module = assign_module +}; + +static int component_open(void) +{ + return PMIX_SUCCESS; +} + +static int component_query(pmix_mca_base_module_t **module, int *priority) +{ + *priority = 100; + *module = (pmix_mca_base_module_t *)&pmix_dummy_handshake_module; + return PMIX_SUCCESS; +} + + +static int component_close(void) +{ + return PMIX_SUCCESS; +} + +static pmix_psec_module_t* assign_module(void) +{ + return &pmix_dummy_handshake_module; +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/munge/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/munge/Makefile.am index 5f01461190c..79756320d6a 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/munge/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/munge/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -47,6 +47,9 @@ mcacomponent_LTLIBRARIES = $(component) mca_psec_munge_la_SOURCES = $(component_sources) mca_psec_munge_la_LDFLAGS = -module -avoid-version $(psec_munge_LDFLAGS) mca_psec_munge_la_LIBADD = $(psec_munge_LIBS) +if NEED_LIBPMIX +mca_psec_munge_la_LIBADD += $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_psec_munge_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/Makefile.am index 9381d8ad60f..b1086a2aac2 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -44,6 +44,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_psec_native_la_SOURCES = $(component_sources) mca_psec_native_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_psec_native_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_psec_native_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/psec_native.c b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/psec_native.c index 1af787399a5..60af0f7af1a 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/psec_native.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/native/psec_native.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -25,7 +25,7 @@ #include "src/util/error.h" #include "src/util/output.h" -#include "src/mca/psec/psec.h" +#include "src/mca/psec/base/base.h" #include "psec_native.h" static pmix_status_t native_init(void); @@ -49,14 +49,14 @@ pmix_psec_module_t pmix_native_module = { static pmix_status_t native_init(void) { - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec: native init"); return PMIX_SUCCESS; } static void native_finalize(void) { - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec: native finalize"); } @@ -167,7 +167,7 @@ static pmix_status_t validate_cred(struct pmix_peer_t *peer, size_t n, m; uint32_t u32; - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec: native validate_cred %s", (NULL == cred) ? "NULL" : "NON-NULL"); @@ -175,10 +175,10 @@ static pmix_status_t validate_cred(struct pmix_peer_t *peer, /* usock protocol - get the remote side's uid/gid */ #if defined(SO_PEERCRED) && (defined(HAVE_STRUCT_UCRED_UID) || defined(HAVE_STRUCT_UCRED_CR_UID)) /* Ignore received 'cred' and validate ucred for socket instead. */ - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec:native checking getsockopt on socket %d for peer credentials", pr->sd); if (getsockopt(pr->sd, SOL_SOCKET, SO_PEERCRED, &ucred, &crlen) < 0) { - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec: getsockopt SO_PEERCRED failed: %s", strerror (pmix_socket_errno)); return PMIX_ERR_INVALID_CRED; @@ -192,10 +192,10 @@ static pmix_status_t validate_cred(struct pmix_peer_t *peer, #endif #elif defined(HAVE_GETPEEREID) - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec:native checking getpeereid on socket %d for peer credentials", pr->sd); if (0 != getpeereid(pr->sd, &euid, &egid)) { - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec: getsockopt getpeereid failed: %s", strerror (pmix_socket_errno)); return PMIX_ERR_INVALID_CRED; @@ -255,14 +255,14 @@ static pmix_status_t validate_cred(struct pmix_peer_t *peer, /* check uid */ if (euid != pr->info->uid) { - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec: socket cred contains invalid uid %u", euid); return PMIX_ERR_INVALID_CRED; } /* check gid */ if (egid != pr->info->gid) { - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_psec_base_framework.framework_output, "psec: socket cred contains invalid gid %u", egid); return PMIX_ERR_INVALID_CRED; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/none/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/none/Makefile.am index 74236996375..cde03ba502f 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/none/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/none/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -44,6 +44,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_psec_none_la_SOURCES = $(component_sources) mca_psec_none_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_psec_none_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_psec_none_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/psec.h b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/psec.h index 4057681f6f6..10c31e9bfa3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psec/psec.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psec/psec.h @@ -1,10 +1,11 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -155,23 +156,12 @@ PMIX_EXPORT pmix_psec_module_t* pmix_psec_base_assign_module(const char *options pmix_output_verbose(2, pmix_globals.debug_output, \ "credential validated"); \ } \ - /* send them the result */ \ - if (PMIX_SUCCESS != (_r = pmix_ptl_base_send_blocking((p)->sd, (char*)&(_r), sizeof(int)))) { \ - PMIX_ERROR_LOG(_r); \ - } \ (r) = _r; \ } else if (NULL != (p)->nptr->compat.psec->server_handshake) { \ - /* execute the handshake if the security mode calls for it */ \ + /* request the handshake if the security mode calls for it */ \ pmix_output_verbose(2, pmix_globals.debug_output, \ - "executing handshake"); \ + "requesting handshake"); \ _r = PMIX_ERR_READY_FOR_HANDSHAKE; \ - if (PMIX_SUCCESS != (_r = pmix_ptl_base_send_blocking((p)->sd, (char*)&(_r), sizeof(int)))) { \ - PMIX_ERROR_LOG(_r); \ - } else { \ - if (PMIX_SUCCESS != (_r = p->nptr->compat.psec->server_handshake((p)->sd))) { \ - PMIX_ERROR_LOG(_r); \ - } \ - } \ (r) = _r; \ } else { \ /* this is not allowed */ \ @@ -179,6 +169,21 @@ PMIX_EXPORT pmix_psec_module_t* pmix_psec_base_assign_module(const char *options } \ } while(0) + +#define PMIX_PSEC_SERVER_HANDSHAKE_IFNEED(r, p, d, nd, in, nin, c) \ + if(PMIX_ERR_READY_FOR_HANDSHAKE == r) { \ + int _r; \ + /* execute the handshake if the security mode calls for it */ \ + pmix_output_verbose(2, pmix_globals.debug_output, \ + "executing handshake"); \ + if (PMIX_SUCCESS != (_r = p->nptr->compat.psec->server_handshake((p)->sd))) { \ + PMIX_ERROR_LOG(_r); \ + } \ + /* Update the reply status */ \ + (r) = _r; \ + } + + /**** COMPONENT STRUCTURE DEFINITION ****/ /* define a component-level API for initializing the component */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/file/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/file/Makefile.am index 30dce46e38e..638fcd6a32a 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/file/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/file/Makefile.am @@ -1,6 +1,6 @@ # # Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -31,6 +31,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_psensor_file_la_SOURCES = $(sources) mca_psensor_file_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_psensor_file_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(component_noinst) libmca_psensor_file_la_SOURCES =$(sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/Makefile.am index df4fe0466a7..95b978415d3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/psensor/heartbeat/Makefile.am @@ -1,7 +1,7 @@ # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # -# Copyright (c) 2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -32,6 +32,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_psensor_heartbeat_la_SOURCES = $(sources) mca_psensor_heartbeat_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_psensor_heartbeat_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(component_noinst) libmca_psensor_heartbeat_la_SOURCES =$(sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/Makefile.am index 68ba424b719..1483ae5de01 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/Makefile.am @@ -2,6 +2,7 @@ # # Copyright (c) 2017 Mellanox Technologies, Inc. # All rights reserved. +# Copyright (c) 2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -36,6 +37,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_pshmem_mmap_la_SOURCES = $(component_sources) mca_pshmem_mmap_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_pshmem_mmap_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_pshmem_mmap_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c index 0b465340bee..043a68e1388 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/base/ptl_base_sendrecv.c @@ -55,13 +55,6 @@ static void _notify_complete(pmix_status_t status, void *cbdata) PMIX_RELEASE(chain); } -static void _timeout(int sd, short args, void *cbdata) -{ - pmix_server_trkr_t *trk = (pmix_server_trkr_t*)cbdata; - - PMIX_RELEASE(trk); -} - static void lcfn(pmix_status_t status, void *cbdata) { pmix_peer_t *peer = (pmix_peer_t*)cbdata; @@ -76,7 +69,6 @@ void pmix_ptl_base_lost_connection(pmix_peer_t *peer, pmix_status_t err) pmix_ptl_posted_recv_t *rcv; pmix_buffer_t buf; pmix_ptl_hdr_t hdr; - struct timeval tv = {1200, 0}; pmix_proc_t proc; pmix_status_t rc; @@ -114,59 +106,60 @@ void pmix_ptl_base_lost_connection(pmix_peer_t *peer, pmix_status_t err) /* remove it from the list */ pmix_list_remove_item(&trk->local_cbs, &rinfo->super); PMIX_RELEASE(rinfo); - trk->lost_connection = true; // mark that a peer's connection was lost - if (0 == pmix_list_get_size(&trk->local_cbs)) { - /* this tracker is complete, so release it - there - * is nobody waiting for a response */ - pmix_list_remove_item(&pmix_server_globals.collectives, &trk->super); - /* do NOT release the tracker here as the host may - * have a copy they will return later. However, they - * might never call back, so set a LONG timeout to - * we avoid a memory leak if they don't */ - pmix_event_evtimer_set(pmix_globals.evbase, &trk->ev, - _timeout, trk); - pmix_event_evtimer_add(&trk->ev, &tv); - trk->event_active = true; - break; + /* if the host has already been called for this tracker, + * then do nothing here - just wait for the host to return + * from the operation */ + if (trk->host_called) { + continue; } - /* if there are other participants waiting for a response, - * we need to let them know that this proc has disappeared - * as otherwise the collective will never complete */ - if (PMIX_FENCENB_CMD == trk->type) { - if (NULL != trk->modexcbfunc) { - /* do NOT release the tracker here as the host may - * have a copy they will return later. However, they - * might never call back, so set a LONG timeout to - * we avoid a memory leak if they don't */ - pmix_event_evtimer_set(pmix_globals.evbase, &trk->ev, - _timeout, trk); - pmix_event_evtimer_add(&trk->ev, &tv); - trk->event_active = true; - trk->modexcbfunc(PMIX_ERR_LOST_CONNECTION_TO_CLIENT, NULL, 0, trk, NULL, NULL); - } - } else if (PMIX_CONNECTNB_CMD == trk->type) { - if (NULL != trk->op_cbfunc) { - /* do NOT release the tracker here as the host may - * have a copy they will return later. However, they - * might never call back, so set a LONG timeout to - * we avoid a memory leak if they don't */ - pmix_event_evtimer_set(pmix_globals.evbase, &trk->ev, - _timeout, trk); - pmix_event_evtimer_add(&trk->ev, &tv); - trk->event_active = true; - trk->op_cbfunc(PMIX_ERR_LOST_CONNECTION_TO_CLIENT, trk); - } - } else if (PMIX_DISCONNECTNB_CMD == trk->type) { - if (NULL != trk->op_cbfunc) { - /* do NOT release the tracker here as the host may - * have a copy they will return later. However, they - * might never call back, so set a LONG timeout to - * we avoid a memory leak if they don't */ - pmix_event_evtimer_set(pmix_globals.evbase, &trk->ev, - _timeout, trk); - pmix_event_evtimer_add(&trk->ev, &tv); - trk->event_active = true; - trk->op_cbfunc(PMIX_ERR_LOST_CONNECTION_TO_CLIENT, trk); + if (trk->def_complete && trk->nlocal == pmix_list_get_size(&trk->local_cbs)) { + /* if this is a local-only collective, then resolve it now */ + if (trk->local) { + /* everyone else has called in - we need to let them know + * that this proc has disappeared + * as otherwise the collective will never complete */ + if (PMIX_FENCENB_CMD == trk->type) { + if (NULL != trk->modexcbfunc) { + trk->modexcbfunc(PMIX_ERR_LOST_CONNECTION_TO_CLIENT, NULL, 0, trk, NULL, NULL); + } + } else if (PMIX_CONNECTNB_CMD == trk->type) { + if (NULL != trk->op_cbfunc) { + trk->op_cbfunc(PMIX_ERR_LOST_CONNECTION_TO_CLIENT, trk); + } + } else if (PMIX_DISCONNECTNB_CMD == trk->type) { + if (NULL != trk->op_cbfunc) { + trk->op_cbfunc(PMIX_ERR_LOST_CONNECTION_TO_CLIENT, trk); + } + } + } else { + /* if the host has not been called, then we need to see if + * the collective is locally complete without this lost + * participant. If so, then we need to pass the call + * up to the host as otherwise the global collective will hang */ + if (PMIX_FENCENB_CMD == trk->type) { + trk->host_called = true; + rc = pmix_host_server.fence_nb(trk->pcs, trk->npcs, + trk->info, trk->ninfo, + NULL, 0, trk->modexcbfunc, trk); + if (PMIX_SUCCESS != rc) { + pmix_list_remove_item(&pmix_server_globals.collectives, &trk->super); + PMIX_RELEASE(trk); + } + } else if (PMIX_CONNECTNB_CMD == trk->type) { + trk->host_called = true; + rc = pmix_host_server.connect(trk->pcs, trk->npcs, trk->info, trk->ninfo, trk->op_cbfunc, trk); + if (PMIX_SUCCESS != rc) { + pmix_list_remove_item(&pmix_server_globals.collectives, &trk->super); + PMIX_RELEASE(trk); + } + } else if (PMIX_DISCONNECTNB_CMD == trk->type) { + trk->host_called = true; + rc = pmix_host_server.disconnect(trk->pcs, trk->npcs, trk->info, trk->ninfo, trk->op_cbfunc, trk); + if (PMIX_SUCCESS != rc) { + pmix_list_remove_item(&pmix_server_globals.collectives, &trk->super); + PMIX_RELEASE(trk); + } + } } } } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/Makefile.am index 6788aba19c4..0a5b86bfdac 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -44,6 +44,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_ptl_tcp_la_SOURCES = $(component_sources) mca_ptl_tcp_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_ptl_tcp_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_ptl_tcp_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c index e921cd599c7..0252eed51c2 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c @@ -1411,7 +1411,18 @@ static pmix_status_t recv_connect_ack(int sd, uint8_t myflag) pmix_client_globals.myserver->info->pname.rank); /* get the returned status from the security handshake */ - pmix_ptl_base_recv_blocking(sd, (char*)&reply, sizeof(pmix_status_t)); + rc = pmix_ptl_base_recv_blocking(sd, (char*)&u32, sizeof(pmix_status_t)); + if (PMIX_SUCCESS != rc) { + if (sockopt) { + /* return the socket to normal */ + if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &save, sz)) { + return PMIX_ERR_UNREACH; + } + } + return rc; + } + + reply = ntohl(u32); if (PMIX_SUCCESS != reply) { /* see if they want us to do the handshake */ if (PMIX_ERR_READY_FOR_HANDSHAKE == reply) { diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.h b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.h index f5373f65069..5813bc7085c 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2018 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -48,6 +48,7 @@ typedef struct { struct sockaddr_storage connection; char *session_filename; char *nspace_filename; + char *pid_filename; char *system_filename; char *rendezvous_filename; int wait_to_connect; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c index 7f3138d52c6..61eb18ec305 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c @@ -15,7 +15,8 @@ * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2018 IBM Corporation. All rights reserved. + * Copyright (c) 2018-2019 IBM Corporation. All rights reserved. + * Copyright (c) 2019 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -117,6 +118,7 @@ static pmix_status_t setup_fork(const pmix_proc_t *proc, char ***env); .disable_ipv6_family = true, .session_filename = NULL, .nspace_filename = NULL, + .pid_filename = NULL, .system_filename = NULL, .rendezvous_filename = NULL, .wait_to_connect = 4, @@ -297,6 +299,10 @@ pmix_status_t component_close(void) unlink(mca_ptl_tcp_component.nspace_filename); free(mca_ptl_tcp_component.nspace_filename); } + if (NULL != mca_ptl_tcp_component.pid_filename) { + unlink(mca_ptl_tcp_component.pid_filename); + free(mca_ptl_tcp_component.pid_filename); + } if (NULL != mca_ptl_tcp_component.rendezvous_filename) { unlink(mca_ptl_tcp_component.rendezvous_filename); free(mca_ptl_tcp_component.rendezvous_filename); @@ -750,10 +756,10 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, FILE *fp; pid_t mypid; - /* first output to a file based on pid */ + /* first output to a std file */ mypid = getpid(); - if (0 > asprintf(&mca_ptl_tcp_component.session_filename, "%s/pmix.%s.tool.%d", - mca_ptl_tcp_component.session_tmpdir, myhost, mypid)) { + if (0 > asprintf(&mca_ptl_tcp_component.session_filename, "%s/pmix.%s.tool", + mca_ptl_tcp_component.session_tmpdir, myhost)) { CLOSE_THE_SOCKET(lt->socket); goto sockerror; } @@ -784,6 +790,40 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, goto sockerror; } + /* now output to a file based on pid */ + mypid = getpid(); + if (0 > asprintf(&mca_ptl_tcp_component.pid_filename, "%s/pmix.%s.tool.%d", + mca_ptl_tcp_component.session_tmpdir, myhost, mypid)) { + CLOSE_THE_SOCKET(lt->socket); + goto sockerror; + } + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, + "WRITING TOOL FILE %s", + mca_ptl_tcp_component.pid_filename); + fp = fopen(mca_ptl_tcp_component.pid_filename, "w"); + if (NULL == fp) { + pmix_output(0, "Impossible to open the file %s in write mode\n", mca_ptl_tcp_component.pid_filename); + PMIX_ERROR_LOG(PMIX_ERR_FILE_OPEN_FAILURE); + CLOSE_THE_SOCKET(lt->socket); + free(mca_ptl_tcp_component.pid_filename); + mca_ptl_tcp_component.pid_filename = NULL; + goto sockerror; + } + + /* output my URI */ + fprintf(fp, "%s\n", lt->uri); + /* add a flag that indicates we accept v2.1 protocols */ + fprintf(fp, "%s\n", PMIX_VERSION); + fclose(fp); + /* set the file mode */ + if (0 != chmod(mca_ptl_tcp_component.pid_filename, S_IRUSR | S_IWUSR | S_IRGRP)) { + PMIX_ERROR_LOG(PMIX_ERR_FILE_OPEN_FAILURE); + CLOSE_THE_SOCKET(lt->socket); + free(mca_ptl_tcp_component.pid_filename); + mca_ptl_tcp_component.pid_filename = NULL; + goto sockerror; + } + /* now output it into a file based on my nspace */ if (0 > asprintf(&mca_ptl_tcp_component.nspace_filename, "%s/pmix.%s.tool.%s", @@ -957,7 +997,7 @@ static void connection_handler(int sd, short args, void *cbdata) pmix_ptl_hdr_t hdr; pmix_peer_t *peer; pmix_rank_t rank=0; - pmix_status_t rc; + pmix_status_t rc, reply; char *msg, *mg, *version; char *sec, *bfrops, *gds; pmix_bfrop_buffer_type_t bftype; @@ -1351,11 +1391,21 @@ static void connection_handler(int sd, short args, void *cbdata) } } if (NULL == nptr) { - /* we don't know this namespace, reject it */ - free(msg); - /* send an error reply to the client */ - rc = PMIX_ERR_NOT_FOUND; - goto error; + /* it is possible that this is a tool inside of + * a job-script as part of a multi-spawn operation. + * Since each tool invocation may have finalized and + * terminated, the tool will appear to "terminate", thus + * causing us to cleanup all references to it, and then + * reappear. So we don't reject this connection request. + * Instead, we create the nspace and rank objects for + * it and let the RM/host decide if this behavior + * is allowed */ + nptr = PMIX_NEW(pmix_namespace_t); + if (NULL == nptr) { + rc = PMIX_ERR_NOMEM; + goto error; + } + nptr->nspace = strdup(nspace); } /* now look for the rank */ info = NULL; @@ -1367,11 +1417,13 @@ static void connection_handler(int sd, short args, void *cbdata) } } if (!found) { - /* rank unknown, reject it */ - free(msg); - /* send an error reply to the client */ - rc = PMIX_ERR_NOT_FOUND; - goto error; + /* see above note about not finding nspace */ + info = PMIX_NEW(pmix_rank_info_t); + info->pname.nspace = strdup(nspace); + info->pname.rank = rank; + info->uid = pnd->uid; + info->gid = pnd->gid; + pmix_list_append(&nptr->ranks, &info->super); } PMIX_RETAIN(info); peer->info = info; @@ -1610,22 +1662,13 @@ static void connection_handler(int sd, short args, void *cbdata) /* validate the connection */ cred.bytes = pnd->cred; cred.size = pnd->len; - PMIX_PSEC_VALIDATE_CONNECTION(rc, peer, NULL, 0, NULL, NULL, &cred); - if (PMIX_SUCCESS != rc) { - pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, - "validation of client connection failed"); - info->proc_cnt--; - pmix_pointer_array_set_item(&pmix_server_globals.clients, peer->index, NULL); - PMIX_RELEASE(peer); - /* send an error reply to the client */ - goto error; - } + PMIX_PSEC_VALIDATE_CONNECTION(reply, peer, NULL, 0, NULL, NULL, &cred); pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, - "client connection validated"); + "client connection validated with status=%d", reply); /* tell the client all is good */ - u32 = htonl(PMIX_SUCCESS); + u32 = htonl(reply); if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t)))) { PMIX_ERROR_LOG(rc); info->proc_cnt--; @@ -1635,6 +1678,22 @@ static void connection_handler(int sd, short args, void *cbdata) PMIX_RELEASE(pnd); return; } + /* If needed perform the handshake. The macro will update reply */ + PMIX_PSEC_SERVER_HANDSHAKE_IFNEED(reply, peer, NULL, 0, NULL, NULL, &cred); + + /* It is possible that connection validation failed + * We need to reply to the client first and cleanup after */ + if (PMIX_SUCCESS != reply) { + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, + "validation of client connection failed"); + info->proc_cnt--; + pmix_pointer_array_set_item(&pmix_server_globals.clients, peer->index, NULL); + PMIX_RELEASE(peer); + /* send an error reply to the client */ + goto error; + } + + /* send the client's array index */ u32 = htonl(peer->index); if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t)))) { @@ -1697,7 +1756,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) pmix_namespace_t *nptr; pmix_rank_info_t *info; pmix_peer_t *peer; - int rc; + pmix_status_t rc, reply; uint32_t u32; pmix_info_t ginfo; pmix_byte_object_t cred; @@ -1856,8 +1915,23 @@ static void process_cbfunc(int sd, short args, void *cbdata) /* validate the connection */ cred.bytes = pnd->cred; cred.size = pnd->len; - PMIX_PSEC_VALIDATE_CONNECTION(rc, peer, NULL, 0, NULL, NULL, &cred); - if (PMIX_SUCCESS != rc) { + PMIX_PSEC_VALIDATE_CONNECTION(reply, peer, NULL, 0, NULL, NULL, &cred); + /* communicate the result to the other side */ + u32 = htonl(reply); + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t)))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(peer); + pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + PMIX_RELEASE(nptr); // will release the info object + CLOSE_THE_SOCKET(pnd->sd); + goto done; + } + + /* If needed perform the handshake. The macro will update reply */ + PMIX_PSEC_SERVER_HANDSHAKE_IFNEED(reply, peer, NULL, 0, NULL, NULL, &cred); + + /* If verification wasn't successful - stop here */ + if (PMIX_SUCCESS != reply) { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "validation of tool credentials failed: %s", PMIx_Error_string(rc)); @@ -1880,7 +1954,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) /* probably cannot send an error reply if we are out of memory */ return; } - info->peerid = peer->index; + peer->info->peerid = peer->index; /* start the events for this tool */ pmix_event_assign(&peer->recv_event, pmix_globals.evbase, peer->sd, @@ -1906,8 +1980,8 @@ static void cnct_cbfunc(pmix_status_t status, pmix_setup_caddy_t *cd; pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, - "pmix:tcp:cnct_cbfunc returning %s:%d", - proc->nspace, proc->rank); + "pmix:tcp:cnct_cbfunc returning %s:%d %s", + proc->nspace, proc->rank, PMIx_Error_string(status)); /* need to thread-shift this into our context */ cd = PMIX_NEW(pmix_setup_caddy_t); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/Makefile.am b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/Makefile.am index e6606e2e844..2c91ac37c8d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/Makefile.am @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2016 Intel, Inc. All rights reserved +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -44,6 +44,9 @@ mcacomponentdir = $(pmixlibdir) mcacomponent_LTLIBRARIES = $(component) mca_ptl_usock_la_SOURCES = $(component_sources) mca_ptl_usock_la_LDFLAGS = -module -avoid-version +if NEED_LIBPMIX +mca_ptl_usock_la_LIBADD = $(top_builddir)/src/libpmix.la +endif noinst_LTLIBRARIES = $(lib) libmca_ptl_usock_la_SOURCES = $(lib_sources) diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c index 7cb073db767..ef33e766f9c 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c @@ -12,10 +12,11 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018 IBM Corporation. All rights reserved. + * Copyright (c) 2019 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -150,6 +151,10 @@ pmix_status_t component_close(void) static int component_query(pmix_mca_base_module_t **module, int *priority) { + if (PMIX_PROC_IS_TOOL(pmix_globals.mypeer)) { + return PMIX_ERR_NOT_SUPPORTED; + } + *module = (pmix_mca_base_module_t*)&pmix_ptl_usock_module; return PMIX_SUCCESS; } @@ -339,7 +344,7 @@ static void connection_handler(int sd, short args, void *cbdata) { pmix_pending_connection_t *pnd = (pmix_pending_connection_t*)cbdata; char *msg, *ptr, *nspace, *version, *sec, *bfrops, *gds; - pmix_status_t rc; + pmix_status_t rc, reply; unsigned int rank; pmix_usock_hdr_t hdr; pmix_namespace_t *nptr, *tmp; @@ -354,6 +359,7 @@ static void connection_handler(int sd, short args, void *cbdata) unsigned int msglen; pmix_info_t ginfo; pmix_byte_object_t cred; + uint32_t u32; /* acquire the object */ PMIX_ACQUIRE_OBJECT(pnd); @@ -687,12 +693,34 @@ static void connection_handler(int sd, short args, void *cbdata) * record it here for future use */ nptr->compat.ptl = &pmix_ptl_usock_module; - /* validate the connection - the macro will send the status result to the client */ - PMIX_PSEC_VALIDATE_CONNECTION(rc, psave, NULL, 0, NULL, 0, &cred); /* now done with the msg */ free(msg); - if (PMIX_SUCCESS != rc) { + /* validate the connection - the macro will send the status result to the client */ + PMIX_PSEC_VALIDATE_CONNECTION(reply, psave, NULL, 0, NULL, 0, &cred); + pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, + "client connection validated with status=%d", reply); + + /* Communicate the result of validation to the client */ + u32 = htonl(reply); + if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&u32, sizeof(uint32_t)))) { + PMIX_ERROR_LOG(rc); + info->proc_cnt--; + PMIX_RELEASE(info); + pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL); + PMIX_RELEASE(psave); + /* error reply was sent by the above macro */ + CLOSE_THE_SOCKET(pnd->sd); + PMIX_RELEASE(pnd); + return; + } + + /* If needed perform the handshake. The macro will update reply */ + PMIX_PSEC_SERVER_HANDSHAKE_IFNEED(reply, psave, NULL, 0, NULL, 0, &cred); + + /* It is possible that connection validation failed + * We need to reply to the client first and cleanup after */ + if (PMIX_SUCCESS != reply) { pmix_output_verbose(2, pmix_ptl_base_framework.framework_output, "validation of client credentials failed: %s", PMIx_Error_string(rc)); @@ -706,6 +734,8 @@ static void connection_handler(int sd, short args, void *cbdata) return; } + + /* send the client's array index */ if (PMIX_SUCCESS != (rc = pmix_ptl_base_send_blocking(pnd->sd, (char*)&psave->index, sizeof(int)))) { PMIX_ERROR_LOG(rc); diff --git a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c index df0af87c280..a66e4d0a768 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c +++ b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c @@ -1,8 +1,10 @@ /* - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Mellanox Technologies, Inc. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -119,7 +121,7 @@ static void stop_progress_engine(pmix_progress_tracker_t *trk) /* break the event loop - this will cause the loop to exit upon completion of any current event */ - pmix_event_base_loopbreak(trk->ev_base); + pmix_event_base_loopexit(trk->ev_base); pmix_thread_join(&trk->engine, NULL); } diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c index 38b85c0175c..f827018d712 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c @@ -157,6 +157,8 @@ pmix_status_t pmix_server_initialize(void) return PMIX_SUCCESS; } +static pmix_server_module_t myhostserver = {0}; + PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, pmix_info_t info[], size_t ninfo) { @@ -185,7 +187,11 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, "pmix:server init called"); /* setup the function pointers */ - pmix_host_server = *module; + if (NULL == module) { + pmix_host_server = myhostserver; + } else { + pmix_host_server = *module; + } if (NULL != info) { for (n=0; n < ninfo; n++) { @@ -520,6 +526,13 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) return PMIX_SUCCESS; } +static void opcbfunc(pmix_status_t status, void *cbdata) +{ + pmix_lock_t *lock = (pmix_lock_t*)cbdata; + lock->status = status; + PMIX_WAKEUP_THREAD(lock); +} + static void _register_nspace(int sd, short args, void *cbdata) { pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; @@ -579,9 +592,7 @@ static void _register_nspace(int sd, short args, void *cbdata) cd->info, cd->ninfo); release: - if (NULL != cd->opcbfunc) { - cd->opcbfunc(rc, cd->cbdata); - } + cd->opcbfunc(rc, cd->cbdata); PMIX_RELEASE(cd); } @@ -591,6 +602,8 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const pmix_nspace_t nspace pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_setup_caddy_t *cd; + pmix_status_t rc; + pmix_lock_t mylock; PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { @@ -610,6 +623,22 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_nspace(const pmix_nspace_t nspace cd->info = info; } + /* if the provided callback is NULL, then substitute + * our own internal cbfunc and block here */ + if (NULL == cbfunc) { + PMIX_CONSTRUCT_LOCK(&mylock); + cd->opcbfunc = opcbfunc; + cd->cbdata = &mylock; + PMIX_THREADSHIFT(cd, _register_nspace); + PMIX_WAIT_THREAD(&mylock); + rc = mylock.status; + PMIX_DESTRUCT_LOCK(&mylock); + if (PMIX_SUCCESS == rc) { + rc = PMIX_OPERATION_SUCCEEDED; + } + return rc; + } + /* we have to push this into our event library to avoid * potential threading issues */ PMIX_THREADSHIFT(cd, _register_nspace); @@ -747,9 +776,7 @@ static void _deregister_nspace(int sd, short args, void *cbdata) } /* release the caller */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(rc, cd->cbdata); - } + cd->opcbfunc(rc, cd->cbdata); PMIX_RELEASE(cd); } @@ -758,6 +785,7 @@ PMIX_EXPORT void PMIx_server_deregister_nspace(const pmix_nspace_t nspace, void *cbdata) { pmix_setup_caddy_t *cd; + pmix_lock_t mylock; pmix_output_verbose(2, pmix_server_globals.base_output, "pmix:server deregister nspace %s", @@ -778,6 +806,18 @@ PMIX_EXPORT void PMIx_server_deregister_nspace(const pmix_nspace_t nspace, cd->opcbfunc = cbfunc; cd->cbdata = cbdata; + /* if the provided callback is NULL, then substitute + * our own internal cbfunc and block here */ + if (NULL == cbfunc) { + PMIX_CONSTRUCT_LOCK(&mylock); + cd->opcbfunc = opcbfunc; + cd->cbdata = &mylock; + PMIX_THREADSHIFT(cd, _deregister_nspace); + PMIX_WAIT_THREAD(&mylock); + PMIX_DESTRUCT_LOCK(&mylock); + return; + } + /* we have to push this into our event library to avoid * potential threading issues */ PMIX_THREADSHIFT(cd, _deregister_nspace); @@ -1054,9 +1094,7 @@ static void _register_client(int sd, short args, void *cbdata) cleanup: /* let the caller know we are done */ - if (NULL != cd->opcbfunc) { - cd->opcbfunc(rc, cd->cbdata); - } + cd->opcbfunc(rc, cd->cbdata); PMIX_RELEASE(cd); } @@ -1065,6 +1103,8 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc, pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_setup_caddy_t *cd; + pmix_status_t rc; + pmix_lock_t mylock; PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { @@ -1089,6 +1129,22 @@ PMIX_EXPORT pmix_status_t PMIx_server_register_client(const pmix_proc_t *proc, cd->opcbfunc = cbfunc; cd->cbdata = cbdata; + /* if the provided callback is NULL, then substitute + * our own internal cbfunc and block here */ + if (NULL == cbfunc) { + PMIX_CONSTRUCT_LOCK(&mylock); + cd->opcbfunc = opcbfunc; + cd->cbdata = &mylock; + PMIX_THREADSHIFT(cd, _register_client); + PMIX_WAIT_THREAD(&mylock); + rc = mylock.status; + PMIX_DESTRUCT_LOCK(&mylock); + if (PMIX_SUCCESS == rc) { + rc = PMIX_OPERATION_SUCCEEDED; + } + return rc; + } + /* we have to push this into our event library to avoid * potential threading issues */ PMIX_THREADSHIFT(cd, _register_client); @@ -1169,9 +1225,7 @@ static void _deregister_client(int sd, short args, void *cbdata) } cleanup: - if (NULL != cd->opcbfunc) { - cd->opcbfunc(PMIX_SUCCESS, cd->cbdata); - } + cd->opcbfunc(PMIX_SUCCESS, cd->cbdata); PMIX_RELEASE(cd); } @@ -1179,6 +1233,7 @@ PMIX_EXPORT void PMIx_server_deregister_client(const pmix_proc_t *proc, pmix_op_cbfunc_t cbfunc, void *cbdata) { pmix_setup_caddy_t *cd; + pmix_lock_t mylock; PMIX_ACQUIRE_THREAD(&pmix_global_lock); if (pmix_globals.init_cntr <= 0) { @@ -1206,6 +1261,18 @@ PMIX_EXPORT void PMIx_server_deregister_client(const pmix_proc_t *proc, cd->opcbfunc = cbfunc; cd->cbdata = cbdata; + /* if the provided callback is NULL, then substitute + * our own internal cbfunc and block here */ + if (NULL == cbfunc) { + PMIX_CONSTRUCT_LOCK(&mylock); + cd->opcbfunc = opcbfunc; + cd->cbdata = &mylock; + PMIX_THREADSHIFT(cd, _deregister_client); + PMIX_WAIT_THREAD(&mylock); + PMIX_DESTRUCT_LOCK(&mylock); + return; + } + /* we have to push this into our event library to avoid * potential threading issues */ PMIX_THREADSHIFT(cd, _deregister_client); @@ -2386,12 +2453,7 @@ static void _mdxcbfunc(int sd, short argc, void *cbdata) xfer.bytes_used = 0; PMIX_DESTRUCT(&xfer); - if (!tracker->lost_connection) { - /* if this tracker has gone thru the "lost_connection" procedure, - * then it has already been removed from the list - otherwise, - * remove it now */ - pmix_list_remove_item(&pmix_server_globals.collectives, &tracker->super); - } + pmix_list_remove_item(&pmix_server_globals.collectives, &tracker->super); PMIX_RELEASE(tracker); PMIX_LIST_DESTRUCT(&nslist); @@ -2644,12 +2706,7 @@ static void _cnct(int sd, short args, void *cbdata) if (NULL != nspaces) { pmix_argv_free(nspaces); } - if (!tracker->lost_connection) { - /* if this tracker has gone thru the "lost_connection" procedure, - * then it has already been removed from the list - otherwise, - * remove it now */ - pmix_list_remove_item(&pmix_server_globals.collectives, &tracker->super); - } + pmix_list_remove_item(&pmix_server_globals.collectives, &tracker->super); PMIX_RELEASE(tracker); /* we are done */ @@ -2726,12 +2783,7 @@ static void _discnct(int sd, short args, void *cbdata) cleanup: /* cleanup the tracker -- the host RM is responsible for * telling us when to remove the nspace from our data */ - if (!tracker->lost_connection) { - /* if this tracker has gone thru the "lost_connection" procedure, - * then it has already been removed from the list - otherwise, - * remove it now */ - pmix_list_remove_item(&pmix_server_globals.collectives, &tracker->super); - } + pmix_list_remove_item(&pmix_server_globals.collectives, &tracker->super); PMIX_RELEASE(tracker); /* we are done */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c index 5e239e0d03d..5f7ad645f86 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c @@ -375,6 +375,7 @@ static pmix_server_trkr_t* new_tracker(char *id, pmix_proc_t *procs, bool all_def; pmix_namespace_t *nptr, *ns; pmix_rank_info_t *info; + pmix_rank_t ns_local = 0; pmix_output_verbose(5, pmix_server_globals.base_output, "new_tracker called with %d procs", (int)nprocs); @@ -450,6 +451,7 @@ static pmix_server_trkr_t* new_tracker(char *id, pmix_proc_t *procs, * of the loop */ } /* is this one of my local ranks? */ + ns_local = 0; PMIX_LIST_FOREACH(info, &nptr->ranks, pmix_rank_info_t) { if (procs[i].rank == info->pname.rank || PMIX_RANK_WILDCARD == procs[i].rank) { @@ -457,12 +459,26 @@ static pmix_server_trkr_t* new_tracker(char *id, pmix_proc_t *procs, "adding local proc %s.%d to tracker", info->pname.nspace, info->pname.rank); /* track the count */ - ++trk->nlocal; + ns_local++; if (PMIX_RANK_WILDCARD != procs[i].rank) { break; } } } + + trk->nlocal += ns_local; + if (!ns_local) { + trk->local = false; + } else if (PMIX_RANK_WILDCARD == procs[i].rank) { + /* If proc is a wildcard we need to additionally check + * that all of the processes in the namespace were + * locally found. + * Otherwise this tracker is not local + */ + if (ns_local != nptr->nprocs) { + trk->local = false; + } + } } if (all_def) { trk->def_complete = true; @@ -645,6 +661,37 @@ pmix_status_t pmix_server_fence(pmix_server_caddy_t *cd, pmix_list_get_size(&trk->local_cbs) == trk->nlocal) { pmix_output_verbose(2, pmix_server_globals.base_output, "fence complete"); + /* if this is a purely local fence (i.e., all participants are local), + * then it is done and we notify accordingly */ + if (trk->local) { + /* the modexcbfunc thread-shifts the call prior to processing, + * so it is okay to call it directly from here. The switchyard + * will acknowledge successful acceptance of the fence request, + * but the client still requires a return from the callback in + * that scenario, so we leave this caddy on the list of local cbs */ + trk->modexcbfunc(PMIX_SUCCESS, NULL, 0, trk, NULL, NULL); + rc = PMIX_SUCCESS; + goto cleanup; + } + /* this fence involves non-local procs - check if the + * host supports it */ + if (NULL == pmix_host_server.fence_nb) { + rc = PMIX_ERR_NOT_SUPPORTED; + /* clear the caddy from this tracker so it can be + * released upon return - the switchyard will send an + * error to this caller, and so the fence completion + * function doesn't need to do so */ + pmix_list_remove_item(&trk->local_cbs, &cd->super); + cd->trk = NULL; + /* we need to ensure that all other local participants don't + * just hang waiting for the error return, so execute + * the fence completion function - it threadshifts the call + * prior to processing, so it is okay to call it directly + * from here */ + trk->host_called = false; // the host will not be calling us back + trk->modexcbfunc(rc, NULL, 0, trk, NULL, NULL); + goto cleanup; + } /* if the user asked us to collect data, then we have * to provide any locally collected data to the host * server so they can circulate it - only take data @@ -719,18 +766,51 @@ pmix_status_t pmix_server_fence(pmix_server_caddy_t *cd, PMIX_BYTE_OBJECT_DESTRUCT(&bo); // releases the data if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); - PMIX_DESTRUCT(&cb); + PMIX_DESTRUCT(&bucket); + /* clear the caddy from this tracker so it can be + * released upon return - the switchyard will send an + * error to this caller, and so the fence completion + * function doesn't need to do so */ + pmix_list_remove_item(&trk->local_cbs, &cd->super); + cd->trk = NULL; + /* we need to ensure that all other local participants don't + * just hang waiting for the error return, so execute + * the fence completion function - it threadshifts the call + * prior to processing, so it is okay to call it directly + * from here */ + trk->modexcbfunc(rc, NULL, 0, trk, NULL, NULL); goto cleanup; } /* now unload the blob and pass it upstairs */ PMIX_UNLOAD_BUFFER(&bucket, data, sz); PMIX_DESTRUCT(&bucket); + trk->host_called = true; rc = pmix_host_server.fence_nb(trk->pcs, trk->npcs, trk->info, trk->ninfo, data, sz, trk->modexcbfunc, trk); - if (PMIX_SUCCESS != rc) { - pmix_list_remove_item(&pmix_server_globals.collectives, &trk->super); - PMIX_RELEASE(trk); + if (PMIX_SUCCESS != rc && PMIX_OPERATION_SUCCEEDED != rc) { + /* clear the caddy from this tracker so it can be + * released upon return - the switchyard will send an + * error to this caller, and so the fence completion + * function doesn't need to do so */ + pmix_list_remove_item(&trk->local_cbs, &cd->super); + cd->trk = NULL; + /* we need to ensure that all other local participants don't + * just hang waiting for the error return, so execute + * the fence completion function - it threadshifts the call + * prior to processing, so it is okay to call it directly + * from here */ + trk->host_called = false; // the host will not be calling us back + trk->modexcbfunc(rc, NULL, 0, trk, NULL, NULL); + } else if (PMIX_OPERATION_SUCCEEDED == rc) { + /* the operation was atomically completed and the host will + * not be calling us back - ensure we notify all participants. + * the modexcbfunc thread-shifts the call prior to processing, + * so it is okay to call it directly from here */ + trk->host_called = false; // the host will not be calling us back + trk->modexcbfunc(PMIX_SUCCESS, NULL, 0, trk, NULL, NULL); + /* ensure that the switchyard doesn't release the caddy */ + rc = PMIX_SUCCESS; } } @@ -1377,11 +1457,31 @@ pmix_status_t pmix_server_disconnect(pmix_server_caddy_t *cd, * across all participants has been completed */ if (trk->def_complete && pmix_list_get_size(&trk->local_cbs) == trk->nlocal) { + trk->host_called = true; rc = pmix_host_server.disconnect(trk->pcs, trk->npcs, trk->info, trk->ninfo, cbfunc, trk); - if (PMIX_SUCCESS != rc) { - /* remove this contributor from the list - they will be notified - * by the switchyard */ + if (PMIX_SUCCESS != rc && PMIX_OPERATION_SUCCEEDED != rc) { + /* clear the caddy from this tracker so it can be + * released upon return - the switchyard will send an + * error to this caller, and so the op completion + * function doesn't need to do so */ pmix_list_remove_item(&trk->local_cbs, &cd->super); + cd->trk = NULL; + /* we need to ensure that all other local participants don't + * just hang waiting for the error return, so execute + * the op completion function - it threadshifts the call + * prior to processing, so it is okay to call it directly + * from here */ + trk->host_called = false; // the host will not be calling us back + cbfunc(rc, trk); + } else if (PMIX_OPERATION_SUCCEEDED == rc) { + /* the operation was atomically completed and the host will + * not be calling us back - ensure we notify all participants. + * the cbfunc thread-shifts the call prior to processing, + * so it is okay to call it directly from here */ + trk->host_called = false; // the host will not be calling us back + cbfunc(PMIX_SUCCESS, trk); + /* ensure that the switchyard doesn't release the caddy */ + rc = PMIX_SUCCESS; } } else { rc = PMIX_SUCCESS; @@ -1526,11 +1626,31 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, * across all participants has been completed */ if (trk->def_complete && pmix_list_get_size(&trk->local_cbs) == trk->nlocal) { + trk->host_called = true; rc = pmix_host_server.connect(trk->pcs, trk->npcs, trk->info, trk->ninfo, cbfunc, trk); - if (PMIX_SUCCESS != rc) { - /* remove this contributor from the list - they will be notified - * by the switchyard */ + if (PMIX_SUCCESS != rc && PMIX_OPERATION_SUCCEEDED != rc) { + /* clear the caddy from this tracker so it can be + * released upon return - the switchyard will send an + * error to this caller, and so the op completion + * function doesn't need to do so */ pmix_list_remove_item(&trk->local_cbs, &cd->super); + cd->trk = NULL; + /* we need to ensure that all other local participants don't + * just hang waiting for the error return, so execute + * the op completion function - it threadshifts the call + * prior to processing, so it is okay to call it directly + * from here */ + trk->host_called = false; // the host will not be calling us back + cbfunc(rc, trk); + } else if (PMIX_OPERATION_SUCCEEDED == rc) { + /* the operation was atomically completed and the host will + * not be calling us back - ensure we notify all participants. + * the cbfunc thread-shifts the call prior to processing, + * so it is okay to call it directly from here */ + trk->host_called = false; // the host will not be calling us back + cbfunc(PMIX_SUCCESS, trk); + /* ensure that the switchyard doesn't release the caddy */ + rc = PMIX_SUCCESS; } } else { rc = PMIX_SUCCESS; @@ -1555,29 +1675,194 @@ pmix_status_t pmix_server_connect(pmix_server_caddy_t *cd, return rc; } +static void _check_cached_events(int sd, short args, void *cbdata) +{ + pmix_setup_caddy_t *scd = (pmix_setup_caddy_t*)cbdata; + pmix_notify_caddy_t *cd; + pmix_range_trkr_t rngtrk; + pmix_proc_t proc; + int i; + size_t k, n; + bool found, matched; + pmix_buffer_t *relay; + pmix_status_t ret = PMIX_SUCCESS; + pmix_cmd_t cmd = PMIX_NOTIFY_CMD; + + /* check if any matching notifications have been cached */ + rngtrk.procs = NULL; + rngtrk.nprocs = 0; + for (i=0; i < pmix_globals.max_events; i++) { + pmix_hotel_knock(&pmix_globals.notifications, i, (void**)&cd); + if (NULL == cd) { + continue; + } + found = false; + if (NULL == scd->codes) { + if (!cd->nondefault) { + /* they registered a default event handler - always matches */ + found = true; + } + } else { + for (k=0; k < scd->ncodes; k++) { + if (scd->codes[k] == cd->status) { + found = true; + break; + } + } + } + if (!found) { + continue; + } + /* check if the affected procs (if given) match those they + * wanted to know about */ + if (!pmix_notify_check_affected(cd->affected, cd->naffected, + scd->procs, scd->nprocs)) { + continue; + } + /* check the range */ + if (NULL == cd->targets) { + rngtrk.procs = &cd->source; + rngtrk.nprocs = 1; + } else { + rngtrk.procs = cd->targets; + rngtrk.nprocs = cd->ntargets; + } + rngtrk.range = cd->range; + PMIX_LOAD_PROCID(&proc, scd->peer->info->pname.nspace, scd->peer->info->pname.rank); + if (!pmix_notify_check_range(&rngtrk, &proc)) { + continue; + } + /* if we were given specific targets, check if this is one */ + found = false; + if (NULL != cd->targets) { + matched = false; + for (n=0; n < cd->ntargets; n++) { + /* if the source of the event is the same peer just registered, then ignore it + * as the event notification system will have already locally + * processed it */ + if (PMIX_CHECK_PROCID(&cd->source, &scd->peer->info->pname)) { + continue; + } + if (PMIX_CHECK_PROCID(&scd->peer->info->pname, &cd->targets[n])) { + matched = true; + /* track the number of targets we have left to notify */ + --cd->nleft; + /* if this is the last one, then evict this event + * from the cache */ + if (0 == cd->nleft) { + pmix_hotel_checkout(&pmix_globals.notifications, cd->room); + found = true; // mark that we should release cd + } + break; + } + } + if (!matched) { + /* do not notify this one */ + continue; + } + } + + /* all matches - notify */ + relay = PMIX_NEW(pmix_buffer_t); + if (NULL == relay) { + /* nothing we can do */ + PMIX_ERROR_LOG(PMIX_ERR_NOMEM); + ret = PMIX_ERR_NOMEM; + break; + } + /* pack the info data stored in the event */ + PMIX_BFROPS_PACK(ret, scd->peer, relay, &cmd, 1, PMIX_COMMAND); + if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + break; + } + PMIX_BFROPS_PACK(ret, scd->peer, relay, &cd->status, 1, PMIX_STATUS); + if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + break; + } + PMIX_BFROPS_PACK(ret, scd->peer, relay, &cd->source, 1, PMIX_PROC); + if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + break; + } + PMIX_BFROPS_PACK(ret, scd->peer, relay, &cd->ninfo, 1, PMIX_SIZE); + if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + break; + } + if (0 < cd->ninfo) { + PMIX_BFROPS_PACK(ret, scd->peer, relay, cd->info, cd->ninfo, PMIX_INFO); + if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); + break; + } + } + PMIX_SERVER_QUEUE_REPLY(ret, scd->peer, 0, relay); + if (PMIX_SUCCESS != ret) { + PMIX_RELEASE(relay); + } + if (found) { + PMIX_RELEASE(cd); + } + } + /* release the caddy */ + if (NULL != scd->codes) { + free(scd->codes); + } + if (NULL != scd->info) { + PMIX_INFO_FREE(scd->info, scd->ninfo); + } + if (NULL != scd->opcbfunc) { + scd->opcbfunc(ret, scd->cbdata); + } + PMIX_RELEASE(scd); +} + +/* provide a callback function for the host when it finishes + * processing the registration */ +static void regevopcbfunc(pmix_status_t status, void *cbdata) +{ + pmix_setup_caddy_t *cd = (pmix_setup_caddy_t*)cbdata; + + /* if the registration succeeded, then check local cache */ + if (PMIX_SUCCESS == status) { + _check_cached_events(0, 0, cd); + return; + } + + /* it didn't succeed, so cleanup and execute the callback + * so we don't hang */ + if (NULL != cd->codes) { + free(cd->codes); + } + if (NULL != cd->info) { + PMIX_INFO_FREE(cd->info, cd->ninfo); + } + if (NULL != cd->opcbfunc) { + cd->opcbfunc(status, cd->cbdata); + } + PMIX_RELEASE(cd); +} + + pmix_status_t pmix_server_register_events(pmix_peer_t *peer, pmix_buffer_t *buf, pmix_op_cbfunc_t cbfunc, void *cbdata) { int32_t cnt; - pmix_status_t rc, ret = PMIX_SUCCESS; + pmix_status_t rc; pmix_status_t *codes = NULL; pmix_info_t *info = NULL; - size_t ninfo=0, ncodes, n, k; + size_t ninfo=0, ncodes, n; pmix_regevents_info_t *reginfo; pmix_peer_events_info_t *prev = NULL; - pmix_notify_caddy_t *cd; pmix_setup_caddy_t *scd; - int i; bool enviro_events = false; - bool found, matched; - pmix_buffer_t *relay; - pmix_cmd_t cmd = PMIX_NOTIFY_CMD; + bool found; pmix_proc_t *affected = NULL; size_t naffected = 0; - pmix_range_trkr_t rngtrk; - pmix_proc_t proc; pmix_output_verbose(2, pmix_server_globals.event_output, "recvd register events for peer %s:%d", @@ -1775,47 +2060,68 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, rc = PMIX_ERR_NOMEM; goto cleanup; } - if (NULL != codes) { - scd->codes = (pmix_status_t*)malloc(ncodes * sizeof(pmix_status_t)); - if (NULL == scd->codes) { - rc = PMIX_ERR_NOMEM; - PMIX_RELEASE(scd); - goto cleanup; - } - memcpy(scd->codes, codes, ncodes * sizeof(pmix_status_t)); - scd->ncodes = ncodes; - } - if (NULL != info) { - PMIX_INFO_CREATE(scd->info, ninfo); - if (NULL == scd->info) { - rc = PMIX_ERR_NOMEM; - if (NULL != scd->codes) { - free(scd->codes); - } - PMIX_RELEASE(scd); - goto cleanup; - } - /* copy the info across */ - for (n=0; n < ninfo; n++) { - PMIX_INFO_XFER(&scd->info[n], &info[n]); - } - scd->ninfo = ninfo; - } + PMIX_RETAIN(peer); + scd->peer = peer; + scd->codes = codes; + scd->ncodes = ncodes; + scd->info = info; + scd->ninfo = ninfo; scd->opcbfunc = cbfunc; scd->cbdata = cbdata; - if (PMIX_SUCCESS != (rc = pmix_host_server.register_events(scd->codes, scd->ncodes, scd->info, scd->ninfo, opcbfunc, scd))) { + if (PMIX_SUCCESS == (rc = pmix_host_server.register_events(scd->codes, scd->ncodes, scd->info, scd->ninfo, regevopcbfunc, scd))) { + /* the host will call us back when completed */ pmix_output_verbose(2, pmix_server_globals.event_output, - "server register events: host server reg events returned rc =%d", rc); - if (NULL != scd->codes) { - free(scd->codes); - } - if (NULL != scd->info) { - PMIX_INFO_FREE(scd->info, scd->ninfo); + "server register events: host server processing event registration"); + if (NULL != affected) { + free(affected); } + return rc; + } else if (PMIX_OPERATION_SUCCEEDED == rc) { + /* we need to check cached notifications, but we want to ensure + * that occurs _after_ the client returns from registering the + * event handler in case the event is flagged for do_not_cache. + * Setup an event to fire after we return as that means it will + * occur after we send the registration response back to the client, + * thus guaranteeing that the client will get their registration + * callback prior to delivery of an event notification */ + PMIX_RETAIN(peer); + scd->peer = peer; + scd->procs = affected; + scd->nprocs = naffected; + scd->opcbfunc = NULL; + scd->cbdata = NULL; + PMIX_THREADSHIFT(scd, _check_cached_events); + return rc; + } else { + /* host returned a genuine error and won't be calling the callback function */ + pmix_output_verbose(2, pmix_server_globals.event_output, + "server register events: host server reg events returned rc =%d", rc); PMIX_RELEASE(scd); + goto cleanup; } } else { rc = PMIX_OPERATION_SUCCEEDED; + /* we need to check cached notifications, but we want to ensure + * that occurs _after_ the client returns from registering the + * event handler in case the event is flagged for do_not_cache. + * Setup an event to fire after we return as that means it will + * occur after we send the registration response back to the client, + * thus guaranteeing that the client will get their registration + * callback prior to delivery of an event notification */ + scd = PMIX_NEW(pmix_setup_caddy_t); + PMIX_RETAIN(peer); + scd->peer = peer; + scd->codes = codes; + scd->ncodes = ncodes; + scd->procs = affected; + scd->nprocs = naffected; + scd->opcbfunc = NULL; + scd->cbdata = NULL; + PMIX_THREADSHIFT(scd, _check_cached_events); + if (NULL != info) { + PMIX_INFO_FREE(info, ninfo); + } + return rc; } cleanup: @@ -1824,144 +2130,12 @@ pmix_status_t pmix_server_register_events(pmix_peer_t *peer, if (NULL != info) { PMIX_INFO_FREE(info, ninfo); } - if (PMIX_SUCCESS != rc && PMIX_OPERATION_SUCCEEDED != rc) { - if (NULL != codes) { - free(codes); - } - if (NULL != affected) { - PMIX_PROC_FREE(affected, naffected); - } - return rc; - } - - /* check if any matching notifications have been cached */ - rngtrk.procs = NULL; - rngtrk.nprocs = 0; - for (i=0; i < pmix_globals.max_events; i++) { - pmix_hotel_knock(&pmix_globals.notifications, i, (void**)&cd); - if (NULL == cd) { - continue; - } - found = false; - if (NULL == codes) { - if (!cd->nondefault) { - /* they registered a default event handler - always matches */ - found = true; - } - } else { - for (k=0; k < ncodes; k++) { - if (codes[k] == cd->status) { - found = true; - break; - } - } - } - if (!found) { - continue; - } - /* check if the affected procs (if given) match those they - * wanted to know about */ - if (!pmix_notify_check_affected(cd->affected, cd->naffected, - affected, naffected)) { - continue; - } - /* check the range */ - if (NULL == cd->targets) { - rngtrk.procs = &cd->source; - rngtrk.nprocs = 1; - } else { - rngtrk.procs = cd->targets; - rngtrk.nprocs = cd->ntargets; - } - rngtrk.range = cd->range; - PMIX_LOAD_PROCID(&proc, peer->info->pname.nspace, peer->info->pname.rank); - if (!pmix_notify_check_range(&rngtrk, &proc)) { - continue; - } - /* if we were given specific targets, check if this is one */ - found = false; - if (NULL != cd->targets) { - matched = false; - for (n=0; n < cd->ntargets; n++) { - /* if the source of the event is the same peer just registered, then ignore it - * as the event notification system will have already locally - * processed it */ - if (PMIX_CHECK_PROCID(&cd->source, &peer->info->pname)) { - continue; - } - if (PMIX_CHECK_PROCID(&peer->info->pname, &cd->targets[n])) { - matched = true; - /* track the number of targets we have left to notify */ - --cd->nleft; - /* if this is the last one, then evict this event - * from the cache */ - if (0 == cd->nleft) { - pmix_hotel_checkout(&pmix_globals.notifications, cd->room); - found = true; // mark that we should release cd - } - break; - } - } - if (!matched) { - /* do not notify this one */ - continue; - } - } - - /* all matches - notify */ - relay = PMIX_NEW(pmix_buffer_t); - if (NULL == relay) { - /* nothing we can do */ - PMIX_ERROR_LOG(PMIX_ERR_NOMEM); - ret = PMIX_ERR_NOMEM; - break; - } - /* pack the info data stored in the event */ - PMIX_BFROPS_PACK(ret, peer, relay, &cmd, 1, PMIX_COMMAND); - if (PMIX_SUCCESS != ret) { - PMIX_ERROR_LOG(ret); - break; - } - PMIX_BFROPS_PACK(ret, peer, relay, &cd->status, 1, PMIX_STATUS); - if (PMIX_SUCCESS != ret) { - PMIX_ERROR_LOG(ret); - break; - } - PMIX_BFROPS_PACK(ret, peer, relay, &cd->source, 1, PMIX_PROC); - if (PMIX_SUCCESS != ret) { - PMIX_ERROR_LOG(ret); - break; - } - PMIX_BFROPS_PACK(ret, peer, relay, &cd->ninfo, 1, PMIX_SIZE); - if (PMIX_SUCCESS != ret) { - PMIX_ERROR_LOG(ret); - break; - } - if (0 < cd->ninfo) { - PMIX_BFROPS_PACK(ret, peer, relay, cd->info, cd->ninfo, PMIX_INFO); - if (PMIX_SUCCESS != ret) { - PMIX_ERROR_LOG(ret); - break; - } - } - PMIX_SERVER_QUEUE_REPLY(ret, peer, 0, relay); - if (PMIX_SUCCESS != ret) { - PMIX_RELEASE(relay); - } - if (found) { - PMIX_RELEASE(cd); - } - } - if (NULL != codes) { free(codes); } if (NULL != affected) { PMIX_PROC_FREE(affected, naffected); } - if (PMIX_SUCCESS != ret) { - rc = ret; - } return rc; } @@ -3318,7 +3492,7 @@ pmix_status_t pmix_server_iofstdin(pmix_peer_t *peer, static void tcon(pmix_server_trkr_t *t) { t->event_active = false; - t->lost_connection = false; + t->host_called = false; t->id = NULL; memset(t->pname.nspace, 0, PMIX_MAX_NSLEN+1); t->pname.rank = PMIX_RANK_UNDEF; @@ -3426,11 +3600,17 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_setup_caddy_t, static void ncon(pmix_notify_caddy_t *p) { - struct timespec tp; - PMIX_CONSTRUCT_LOCK(&p->lock); - clock_gettime(CLOCK_MONOTONIC, &tp); +#if defined(__linux__) && OPAL_HAVE_CLOCK_GETTIME + struct timespec tp; + (void) clock_gettime(CLOCK_MONOTONIC, &tp); p->ts = tp.tv_sec; +#else + /* Fall back to gettimeofday() if we have nothing else */ + struct timeval tv; + gettimeofday(&tv, NULL); + p->ts = tv.tv_sec; +#endif p->room = -1; memset(p->source.nspace, 0, PMIX_MAX_NSLEN+1); p->source.rank = PMIX_RANK_UNDEF; diff --git a/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c b/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c index effa1190ff2..d2b9c9acbe5 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c +++ b/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c @@ -260,9 +260,7 @@ static void job_data(struct pmix_peer_t *pr, PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, pmix_info_t info[], size_t ninfo) { - pmix_kval_t *kptr; pmix_status_t rc; - char hostname[PMIX_MAX_NSLEN]; char *evar, *nspace = NULL; pmix_rank_t rank = PMIX_RANK_UNDEF; bool gdsfound, do_not_connect = false; @@ -738,314 +736,39 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } + /* quick check to see if we got something back. If this + * is a launcher that is being executed multiple times + * in a job-script, then the original registration data + * will have been deleted after the first invocation. In + * such a case, we simply regenerate it locally as it is + * well-known */ + pmix_cb_t cb; + PMIX_CONSTRUCT(&cb, pmix_cb_t); + pmix_strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + wildcard.rank = PMIX_RANK_WILDCARD; + cb.proc = &wildcard; + cb.copy = true; + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); + PMIX_DESTRUCT(&cb); + if (PMIX_SUCCESS != rc) { + pmix_output_verbose(5, pmix_client_globals.get_output, + "pmix:tool:client data not found in internal storage"); + rc = pmix_tool_init_info(); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE_THREAD(&pmix_global_lock); + return rc; + } + } } else { /* now finish the initialization by filling our local * datastore with typical job-related info. No point * in having the server generate these as we are * obviously a singleton, and so the values are well-known */ - pmix_strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); - wildcard.rank = pmix_globals.myid.rank; - - /* the jobid is just our nspace */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_JOBID); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_STRING; - kptr->value->data.string = strdup(pmix_globals.myid.nspace); - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* our rank */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_RANK); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_INT; - kptr->value->data.integer = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &pmix_globals.myid, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* nproc offset */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_NPROC_OFFSET); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* node size */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_NODE_SIZE); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 1; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* local peers */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_LOCAL_PEERS); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_STRING; - kptr->value->data.string = strdup("0"); - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* local leader */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_LOCALLDR); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* universe size */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_UNIV_SIZE); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 1; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* job size - we are our very own job, so we have no peers */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_JOB_SIZE); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 1; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* local size - only us in our job */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_LOCAL_SIZE); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 1; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* max procs - since we are a self-started tool, there is no - * allocation within which we can grow ourselves */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_MAX_PROCS); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 1; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* app number */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_APPNUM); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &pmix_globals.myid, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* app leader */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_APPLDR); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &pmix_globals.myid, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* app rank */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_APP_RANK); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &pmix_globals.myid, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* global rank */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_GLOBAL_RANK); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT32; - kptr->value->data.uint32 = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &pmix_globals.myid, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* local rank - we are alone in our job */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_LOCAL_RANK); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_UINT16; - kptr->value->data.uint32 = 0; - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &pmix_globals.myid, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* we cannot know the node rank as we don't know what - * other processes are executing on this node - so - * we'll add that info to the server-tool handshake - * and load it from there */ - - /* hostname */ - gethostname(hostname, PMIX_MAX_NSLEN); - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_HOSTNAME); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_STRING; - kptr->value->data.string = strdup(hostname); - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &pmix_globals.myid, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* we cannot know the RM's nodeid for this host, so - * we'll add that info to the server-tool handshake - * and load it from there */ - - /* the nodemap is simply our hostname as there is no - * regex to generate */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_NODE_MAP); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_STRING; - kptr->value->data.string = strdup(hostname); - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); - if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE_THREAD(&pmix_global_lock); - return rc; - } - PMIX_RELEASE(kptr); // maintain accounting - - /* likewise, the proc map is just our rank as we are - * the only proc in this job */ - kptr = PMIX_NEW(pmix_kval_t); - kptr->key = strdup(PMIX_PROC_MAP); - PMIX_VALUE_CREATE(kptr->value, 1); - kptr->value->type = PMIX_STRING; - kptr->value->data.string = strdup("0"); - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, - &wildcard, - PMIX_INTERNAL, kptr); + rc = pmix_tool_init_info(); if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); PMIX_RELEASE_THREAD(&pmix_global_lock); return rc; } - PMIX_RELEASE(kptr); // maintain accounting } PMIX_RELEASE_THREAD(&pmix_global_lock); @@ -1061,6 +784,307 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc, return rc; } +pmix_status_t pmix_tool_init_info(void) +{ + pmix_kval_t *kptr; + pmix_status_t rc; + pmix_proc_t wildcard; + char hostname[PMIX_MAX_NSLEN]; + + pmix_strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); + wildcard.rank = pmix_globals.myid.rank; + + /* the jobid is just our nspace */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_JOBID); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_STRING; + kptr->value->data.string = strdup(pmix_globals.myid.nspace); + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* our rank */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_RANK); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_INT; + kptr->value->data.integer = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* nproc offset */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_NPROC_OFFSET); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* node size */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_NODE_SIZE); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 1; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* local peers */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_LOCAL_PEERS); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_STRING; + kptr->value->data.string = strdup("0"); + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* local leader */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_LOCALLDR); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* universe size */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_UNIV_SIZE); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 1; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* job size - we are our very own job, so we have no peers */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_JOB_SIZE); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 1; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* local size - only us in our job */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_LOCAL_SIZE); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 1; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* max procs - since we are a self-started tool, there is no + * allocation within which we can grow ourselves */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_MAX_PROCS); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 1; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* app number */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_APPNUM); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* app leader */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_APPLDR); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* app rank */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_APP_RANK); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* global rank */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_GLOBAL_RANK); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT32; + kptr->value->data.uint32 = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* local rank - we are alone in our job */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_LOCAL_RANK); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_UINT16; + kptr->value->data.uint32 = 0; + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* we cannot know the node rank as we don't know what + * other processes are executing on this node - so + * we'll add that info to the server-tool handshake + * and load it from there */ + + /* hostname */ + if (NULL != pmix_globals.hostname) { + pmix_strncpy(hostname, pmix_globals.hostname, PMIX_MAX_NSLEN); + } else { + gethostname(hostname, PMIX_MAX_NSLEN); + } + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_HOSTNAME); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_STRING; + kptr->value->data.string = strdup(hostname); + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &pmix_globals.myid, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* we cannot know the RM's nodeid for this host, so + * we'll add that info to the server-tool handshake + * and load it from there */ + + /* the nodemap is simply our hostname as there is no + * regex to generate */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_NODE_MAP); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_STRING; + kptr->value->data.string = strdup(hostname); + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + /* likewise, the proc map is just our rank as we are + * the only proc in this job */ + kptr = PMIX_NEW(pmix_kval_t); + kptr->key = strdup(PMIX_PROC_MAP); + PMIX_VALUE_CREATE(kptr->value, 1); + kptr->value->type = PMIX_STRING; + kptr->value->data.string = strdup("0"); + PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, + &wildcard, + PMIX_INTERNAL, kptr); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_RELEASE(kptr); // maintain accounting + + return PMIX_SUCCESS; +} + + typedef struct { pmix_lock_t lock; pmix_event_t ev; diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/output.c b/opal/mca/pmix/pmix3x/pmix/src/util/output.c index 8648f1a0b72..cf73f507008 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/output.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/output.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -332,15 +332,10 @@ PMIX_EXPORT void pmix_output(int output_id, const char *format, ...) /* * Send a message to a stream if the verbose level is high enough */ - PMIX_EXPORT void pmix_output_verbose(int level, int output_id, const char *format, ...) + PMIX_EXPORT bool pmix_output_check_verbosity(int level, int output_id) { - if (output_id >= 0 && output_id < PMIX_OUTPUT_MAX_STREAMS && - info[output_id].ldi_verbose_level >= level) { - va_list arglist; - va_start(arglist, format); - output(output_id, format, arglist); - va_end(arglist); - } + return (output_id >= 0 && output_id < PMIX_OUTPUT_MAX_STREAMS && + info[output_id].ldi_verbose_level >= level); } diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/output.h b/opal/mca/pmix/pmix3x/pmix/src/util/output.h index c3274bab7d0..5e8fa677b5e 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/output.h +++ b/opal/mca/pmix/pmix3x/pmix/src/util/output.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -414,12 +414,13 @@ PMIX_EXPORT void pmix_output(int output_id, const char *format, ...) __pmix_attr * * @see pmix_output_set_verbosity() */ -PMIX_EXPORT void pmix_output_verbose(int verbose_level, int output_id, - const char *format, ...) __pmix_attribute_format__(__printf__, 3, 4); +#define pmix_output_verbose(verbose_level, output_id, ...) \ + if (pmix_output_check_verbosity(verbose_level, output_id)) { \ + pmix_output(output_id, __VA_ARGS__); \ + } + +PMIX_EXPORT bool pmix_output_check_verbosity(int verbose_level, int output_id); -/** -* Same as pmix_output_verbose(), but takes a va_list form of varargs. -*/ PMIX_EXPORT void pmix_output_vverbose(int verbose_level, int output_id, const char *format, va_list ap) __pmix_attribute_format__(__printf__, 3, 0); diff --git a/opal/mca/pmix/pmix3x/pmix/test/Makefile.am b/opal/mca/pmix/pmix3x/pmix/test/Makefile.am index 3b4ee7214ca..c886e3b1fb3 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/test/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2018 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -33,9 +33,61 @@ headers = test_common.h cli_stages.h server_callbacks.h utils.h test_fence.h \ AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/src/api -noinst_SCRIPTS = pmix_client_otheruser.sh +noinst_SCRIPTS = pmix_client_otheruser.sh \ + run_tests00.pl \ + run_tests01.pl \ + run_tests02.pl \ + run_tests03.pl \ + run_tests04.pl \ + run_tests05.pl \ + run_tests06.pl \ + run_tests07.pl \ + run_tests08.pl \ + run_tests09.pl \ + run_tests10.pl \ + run_tests11.pl \ + run_tests12.pl \ + run_tests13.pl \ + run_tests14.pl \ + run_tests15.pl + noinst_PROGRAMS = +######################### +# Support for "make check" + +check_PROGRAMS = \ + pmix_test \ + pmix_client \ + pmix_regex + +if WANT_PMI_BACKWARD +check_PROGRAMS += \ + pmi_client \ + pmi2_client +endif + +TESTS = \ + run_tests00.pl \ + run_tests01.pl \ + run_tests02.pl \ + run_tests03.pl \ + run_tests04.pl \ + run_tests05.pl \ + run_tests06.pl \ + run_tests07.pl \ + run_tests08.pl \ + run_tests09.pl \ + run_tests10.pl \ + run_tests11.pl \ + run_tests12.pl \ + run_tests13.pl \ + run_tests14.pl \ + run_tests15.pl + + +########################## + if WANT_PMI_BACKWARD noinst_PROGRAMS += pmi_client pmi2_client endif diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests00.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests00.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests00.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests01.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests01.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests01.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests02.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests02.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests02.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests03.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests03.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests03.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests04.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests04.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests04.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests05.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests05.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests05.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests06.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests06.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests06.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests07.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests07.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests07.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests08.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests08.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests08.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests09.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests09.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests09.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests10.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests10.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests10.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests11.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests11.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests11.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests12.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests12.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests12.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests13.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests13.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests13.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests14.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests14.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests14.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/run_tests15.pl.in b/opal/mca/pmix/pmix3x/pmix/test/run_tests15.pl.in new file mode 100755 index 00000000000..fb139c9ce7d --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/test/run_tests15.pl.in @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2019 Intel, Inc. +# +# Copyright (c) 2019 Cisco Systems, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow + +use strict; + +my @tests = ("-n 4 --ns-dist 3:1 --fence \"[db | 0:0-2;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:0]\"", + "-n 4 --ns-dist 3:1 --fence \"[db | 0:;1:]\"", + "-n 4 --ns-dist 3:1 --fence \"[0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[b | 0:]\"", + "-n 4 --ns-dist 3:1 --fence \"[d | 0:]\" --noise \"[0:0,1]\"", + "-n 4 --job-fence -c", + "-n 4 --job-fence", + "-n 2 --test-publish", + "-n 2 --test-spawn", + "-n 2 --test-connect", + "-n 5 --test-resolve-peers --ns-dist \"1:2:2\"", + "-n 5 --test-replace 100:0,1,10,50,99", + "-n 5 --test-internal 10", + "-s 2 -n 2 --job-fence", + "-s 2 -n 2 --job-fence -c"); + +my $test; +my $cmd; +my $output; +my $status = 0; +my $testnum; + +# We are running tests against the build tree (vs. the installation +# tree). Autogen gives us a full list of all possible component +# directories in PMIX_COMPONENT_LIBRARY_PATHS. Iterate through each +# of those directories: 1) to see if there is actually a component +# built in there, and 2) to turn it into an absolute path name. Then +# put the new list in the "mca_bast_component_path" MCA parameter env +# variable so that the MCA base knows where to find all the +# components. +my @myfullpaths; +my $mybuilddir = "@PMIX_BUILT_TEST_PREFIX@"; +my $mypathstr = "@PMIX_COMPONENT_LIBRARY_PATHS@"; +my @splitstr = split(':', $mypathstr); +foreach my $path (@splitstr) { + # Note that the component is actually built in the ".libs" + # subdirectory. If the component wasn't built, that subdirectory + # will not exist, so don't save it. + my $fullpath = $mybuilddir . "/" . $path . "/.libs"; + push(@myfullpaths, $fullpath) + if (-d $fullpath); +} +my $mymcapaths = join(":", @myfullpaths); +$ENV{'PMIX_MCA_mca_base_component_path'} = $mymcapaths; + +my $wdir = $mybuilddir . "/test"; +chdir $wdir; + +$testnum = $0; +$testnum =~ s/.pl//; +$testnum = substr($testnum, -2); +$test = @tests[$testnum]; + +$cmd = "./pmix_test " . $test . " 2>&1"; +print $cmd . "\n"; +$output = `$cmd`; +print $output . "\n"; +print "CODE $?\n"; +$status = "$?"; + +exit($status >> 8); diff --git a/opal/mca/pmix/pmix3x/pmix/test/test_common.h b/opal/mca/pmix/pmix3x/pmix/test/test_common.h index fd25f8bdf87..10b180e6598 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/test_common.h +++ b/opal/mca/pmix/pmix3x/pmix/test/test_common.h @@ -262,7 +262,7 @@ typedef struct { TEST_VERBOSE(("%s:%d want to get from %s:%d key %s", my_nspace, my_rank, ns, r, key)); \ if (blocking) { \ if (PMIX_SUCCESS != (rc = PMIx_Get(&foobar, key, NULL, 0, &val))) { \ - if( !( rc == PMIX_ERR_NOT_FOUND && ok_notfnd ) ){ \ + if( !( (rc == PMIX_ERR_NOT_FOUND || rc == PMIX_ERR_PROC_ENTRY_NOT_FOUND) && ok_notfnd ) ){ \ TEST_ERROR(("%s:%d: PMIx_Get failed: %d from %s:%d, key %s", my_nspace, my_rank, rc, ns, r, key)); \ } \ rc = PMIX_ERROR; \ @@ -289,7 +289,7 @@ typedef struct { } \ if (PMIX_SUCCESS == rc) { \ if( PMIX_SUCCESS != cbdata.status ){ \ - if( !( cbdata.status == PMIX_ERR_NOT_FOUND && ok_notfnd ) ){ \ + if( !( (cbdata.status == PMIX_ERR_NOT_FOUND || cbdata.status == PMIX_ERR_PROC_ENTRY_NOT_FOUND) && ok_notfnd ) ){ \ TEST_ERROR(("%s:%d: PMIx_Get_nb failed: %d from %s:%d, key=%s", \ my_nspace, my_rank, rc, my_nspace, r)); \ } \ diff --git a/opal/mca/pmix/pmix3x/pmix/test/test_fence.c b/opal/mca/pmix/pmix3x/pmix/test/test_fence.c index 9ad4cf786df..a33d9618b71 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/test_fence.c +++ b/opal/mca/pmix/pmix3x/pmix/test/test_fence.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ @@ -384,7 +384,7 @@ int test_job_fence(test_params params, char *my_nspace, pmix_rank_t my_rank) if( local ){ GET(int, (12340+j), my_nspace, i+params.base_rank, 100, j, 0, 0, 0); if (PMIX_SUCCESS != rc) { - TEST_ERROR(("%s:%d: PMIx_Get failed: %d", my_nspace, my_rank, rc)); + TEST_ERROR(("%s:%d: PMIx_Get failed: %s", my_nspace, my_rank, PMIx_Error_string(rc))); return PMIX_ERROR; } @@ -423,9 +423,10 @@ int test_job_fence(test_params params, char *my_nspace, pmix_rank_t my_rank) my_nspace, my_rank)); return PMIX_ERROR; } - if (PMIX_ERR_NOT_FOUND != rc) { - TEST_ERROR(("%s:%d [ERROR]: PMIx_Get returned %d instead of not_found", - my_nspace, my_rank, rc)); + if (PMIX_ERR_NOT_FOUND != rc && PMIX_ERR_PROC_ENTRY_NOT_FOUND != rc) { + TEST_ERROR(("%s:%d [ERROR]: PMIx_Get returned %s instead of not_found", + my_nspace, my_rank, PMIx_Error_string(rc))); + return PMIX_ERROR; } if (NULL != val) { TEST_ERROR(("%s:%d [ERROR]: PMIx_Get did not return NULL value", my_nspace, my_rank)); diff --git a/opal/mca/pmix/pmix3x/pmix/test/test_server.c b/opal/mca/pmix/pmix3x/pmix/test/test_server.c index 3627dade912..426014149ef 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/test_server.c +++ b/opal/mca/pmix/pmix3x/pmix/test/test_server.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2018 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 Research Organization for Information Science @@ -93,12 +93,36 @@ static void release_cb(pmix_status_t status, void *cbdata) *ptr = 0; } -static void set_namespace(int local_size, int univ_size, char *ranks, char *name) +static void fill_seq_ranks_array(size_t nprocs, int base_rank, char **ranks) +{ + uint32_t i; + int len = 0, max_ranks_len; + if (0 >= nprocs) { + return; + } + max_ranks_len = nprocs * (MAX_DIGIT_LEN+1); + *ranks = (char*) malloc(max_ranks_len); + for (i = 0; i < nprocs; i++) { + len += snprintf(*ranks + len, max_ranks_len-len-1, "%d", i+base_rank); + if (i != nprocs-1) { + len += snprintf(*ranks + len, max_ranks_len-len-1, "%c", ','); + } + } + if (len >= max_ranks_len-1) { + free(*ranks); + *ranks = NULL; + TEST_ERROR(("Not enough allocated space for global ranks array.")); + } +} + +static void set_namespace(int local_size, int univ_size, + int base_rank, char *name) { size_t ninfo; pmix_info_t *info; ninfo = 8; char *regex, *ppn; + char *ranks = NULL; PMIX_INFO_CREATE(info, ninfo); pmix_strncpy(info[0].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); @@ -113,19 +137,31 @@ static void set_namespace(int local_size, int univ_size, char *ranks, char *name info[2].value.type = PMIX_UINT32; info[2].value.data.uint32 = local_size; + /* generate the array of local peers */ + fill_seq_ranks_array(local_size, base_rank, &ranks); + if (NULL == ranks) { + return; + } pmix_strncpy(info[3].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); info[3].value.type = PMIX_STRING; info[3].value.data.string = strdup(ranks); + free(ranks); PMIx_generate_regex(NODE_NAME, ®ex); pmix_strncpy(info[4].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); info[4].value.type = PMIX_STRING; - info[4].value.data.string = regex; + info[4].value.data.string = strdup(regex); + /* generate the global proc map */ + fill_seq_ranks_array(univ_size, 0, &ranks); + if (NULL == ranks) { + return; + } PMIx_generate_ppn(ranks, &ppn); + free(ranks); pmix_strncpy(info[5].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); info[5].value.type = PMIX_STRING; - info[5].value.data.string = ppn; + info[5].value.data.string = strdup(ppn); pmix_strncpy(info[6].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); info[6].value.type = PMIX_UINT32; @@ -143,28 +179,6 @@ static void set_namespace(int local_size, int univ_size, char *ranks, char *name PMIX_INFO_FREE(info, ninfo); } -static void fill_seq_ranks_array(size_t nprocs, int base_rank, char **ranks) -{ - uint32_t i; - int len = 0, max_ranks_len; - if (0 >= nprocs) { - return; - } - max_ranks_len = nprocs * (MAX_DIGIT_LEN+1); - *ranks = (char*) malloc(max_ranks_len); - for (i = 0; i < nprocs; i++) { - len += snprintf(*ranks + len, max_ranks_len-len-1, "%d", i+base_rank); - if (i != nprocs-1) { - len += snprintf(*ranks + len, max_ranks_len-len-1, "%c", ','); - } - } - if (len >= max_ranks_len-1) { - free(*ranks); - *ranks = NULL; - TEST_ERROR(("Not enough allocated space for global ranks array.")); - } -} - static void server_unpack_procs(char *buf, size_t size) { char *ptr = buf; @@ -878,14 +892,8 @@ int server_launch_clients(int local_size, int univ_size, int base_rank, univ_size)); TEST_VERBOSE(("Setting job info")); - fill_seq_ranks_array(local_size, base_rank, &ranks); - if (NULL == ranks) { - PMIx_server_finalize(); - TEST_ERROR(("fill_seq_ranks_array failed")); - return PMIX_ERROR; - } (void)snprintf(proc.nspace, PMIX_MAX_NSLEN, "%s-%d", TEST_NAMESPACE, num_ns); - set_namespace(local_size, univ_size, ranks, proc.nspace); + set_namespace(local_size, univ_size, base_rank, proc.nspace); if (NULL != ranks) { free(ranks); } @@ -909,17 +917,18 @@ int server_launch_clients(int local_size, int univ_size, int base_rank, /* fork/exec the test */ for (n = 0; n < local_size; n++) { proc.rank = base_rank + rank_counter; - if (PMIX_SUCCESS != (rc = PMIx_server_setup_fork(&proc, client_env))) {//n - TEST_ERROR(("Server fork setup failed with error %d", rc)); + rc = PMIx_server_register_client(&proc, myuid, mygid, NULL, NULL, NULL); + if (PMIX_SUCCESS != rc && PMIX_OPERATION_SUCCEEDED != rc) { + TEST_ERROR(("Server register client failed with error %d", rc)); PMIx_server_finalize(); cli_kill_all(); - return rc; + return 0; } - if (PMIX_SUCCESS != (rc = PMIx_server_register_client(&proc, myuid, mygid, NULL, NULL, NULL))) {//n + if (PMIX_SUCCESS != (rc = PMIx_server_setup_fork(&proc, client_env))) {//n TEST_ERROR(("Server fork setup failed with error %d", rc)); PMIx_server_finalize(); cli_kill_all(); - return 0; + return rc; } cli_info[cli_counter].pid = fork(); From 9d0adbc6bc7d8028a859d4062bce1ef1d4083e2f Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 26 Jun 2019 09:31:43 -0700 Subject: [PATCH 30/94] Update to track 32-bit support commit Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix3x/pmix/configure.ac | 37 +++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/opal/mca/pmix/pmix3x/pmix/configure.ac b/opal/mca/pmix/pmix3x/pmix/configure.ac index dcea50ac001..53c0d64441f 100644 --- a/opal/mca/pmix/pmix3x/pmix/configure.ac +++ b/opal/mca/pmix/pmix3x/pmix/configure.ac @@ -44,13 +44,27 @@ m4_include([config/autogen_found_items.m4]) AC_INIT([pmix], [m4_normalize(esyscmd([config/pmix_get_version.sh VERSION --tarball]))], - [http://pmix.github.io/master], [pmix]) + [https://github.com/pmix/pmix/issues], [pmix]) AC_PREREQ(2.69) AC_CONFIG_AUX_DIR(./config) # Note that this directory must *exactly* match what was specified via # -I in ACLOCAL_AMFLAGS in the top-level Makefile.am. AC_CONFIG_MACRO_DIR(./config) +# autotools expects to perform tests without interference +# from user-provided CFLAGS, particularly -Werror flags. +# Search for them here and cache any we find +PMIX_CFLAGS_cache= +PMIX_CFLAGS_pass= +for val in $CFLAGS; do + if echo "$val" | grep -q -e "-W"; then + PMIX_CFLAGS_cache="$PMIX_CFLAGS_cache $val"; + else + PMIX_CFLAGS_pass="$PMIX_CFLAGS_pass $val"; + fi +done +CFLAGS=$PMIX_CFLAGS_pass + PMIX_CAPTURE_CONFIGURE_CLI([PMIX_CONFIGURE_CLI]) # Get our platform support file. This has to be done very, very early @@ -208,7 +222,17 @@ AS_IF([test -z "$CC_FOR_BUILD"],[ AC_SUBST([CC_FOR_BUILD], [$CC]) ]) +# restore any user-provided Werror flags +AS_IF([test ! -z "$PMIX_CFLAGS_cache"], [CFLAGS="$CFLAGS $PMIX_CFLAGS_cache"]) + +# Delay setting pickyness until here so we +# don't break configure code tests +#if test "$WANT_PICKY_COMPILER" = "1"; then +# CFLAGS="$CFLAGS -Wall -Wextra -Werror" +#fi + # Cleanup duplicate flags +PMIX_FLAGS_UNIQ(CFLAGS) PMIX_FLAGS_UNIQ(CPPFLAGS) PMIX_FLAGS_UNIQ(LDFLAGS) PMIX_FLAGS_UNIQ(LIBS) @@ -235,6 +259,17 @@ AC_MSG_RESULT([$LDFLAGS]) AC_MSG_CHECKING([final LIBS]) AC_MSG_RESULT([$LIBS]) +#################################################################### +# -Werror for CI scripts +#################################################################### + +AC_ARG_ENABLE(werror, + AC_HELP_STRING([--enable-werror], + [Treat compiler warnings as errors]), +[ + CFLAGS="$CFLAGS -Werror" +]) + #################################################################### # Version information #################################################################### From 1d0e0557b9fa382e52da574751f93a740f907683 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 2 Jul 2019 08:56:20 -0700 Subject: [PATCH 31/94] v4.0.x: Update PMIx to official v3.1.3 release Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix3x/pmix/NEWS | 3 ++- opal/mca/pmix/pmix3x/pmix/VERSION | 6 +++--- opal/mca/pmix/pmix3x/pmix/config/pmix.m4 | 6 +++--- opal/mca/pmix/pmix3x/pmix/configure.ac | 16 ++-------------- opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec | 2 +- .../pmix/pmix3x/pmix/contrib/whitespace-purge.sh | 4 ++-- .../pmix3x/pmix/src/mca/pnet/opa/configure.m4 | 4 ++-- 7 files changed, 15 insertions(+), 26 deletions(-) diff --git a/opal/mca/pmix/pmix3x/pmix/NEWS b/opal/mca/pmix/pmix3x/pmix/NEWS index dd478a9a87e..6445b18275d 100644 --- a/opal/mca/pmix/pmix3x/pmix/NEWS +++ b/opal/mca/pmix/pmix3x/pmix/NEWS @@ -21,7 +21,7 @@ example, a bug might be fixed in the master, and then moved to multiple release branches. -3.1.3 -- TBD +3.1.3 -- 2 July 2019 ---------------------- - PR #1096: Restore PMIX_NUM_SLOTS for backward compatibility - PR #1106: Automatically generate PMIX_NUMERIC_VERSION @@ -64,6 +64,7 @@ multiple release branches. - PR #1311: Work around memory bug in older gcc compilers - PR #1321: Provide memory op hooks in user-facing macros - PR #1329: Add -fPIC to static builds +- PR #1340: Do not use '==' in m4 test statements 3.1.2 -- 24 Jan 2019 diff --git a/opal/mca/pmix/pmix3x/pmix/VERSION b/opal/mca/pmix/pmix3x/pmix/VERSION index 98c143b2677..13511b92a55 100644 --- a/opal/mca/pmix/pmix3x/pmix/VERSION +++ b/opal/mca/pmix/pmix3x/pmix/VERSION @@ -23,14 +23,14 @@ release=3 # The only requirement is that it must be entirely printable ASCII # characters and have no white space. -greek=rc4 +greek= # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git5e6ec324 +repo_rev=gitc10fd1d4 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Jun 25, 2019" +date="Jul 02, 2019" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 index a90e23b9795..e0585256c49 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 @@ -191,7 +191,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[ [Link the output PMIx library to this extra lib (used in embedded mode)])) AC_MSG_CHECKING([for extra lib]) AS_IF([test ! -z "$with_pmix_extra_lib"], - [AS_IF([test "$with_pmix_extra_lib" == "yes" || test "$with_pmix_extra_lib" == "no"], + [AS_IF([test "$with_pmix_extra_lib" = "yes" || test "$with_pmix_extra_lib" = "no"], [AC_MSG_RESULT([ERROR]) AC_MSG_WARN([Invalid value for --with-extra-pmix-lib:]) AC_MSG_WARN([ $with_pmix_extra_lib]) @@ -209,7 +209,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[ [Link any embedded components/tools that require it to the provided libtool lib (used in embedded mode)])) AC_MSG_CHECKING([for extra ltlib]) AS_IF([test ! -z "$with_pmix_extra_ltlib"], - [AS_IF([test "$with_pmix_extra_ltlib" == "yes" || test "$with_pmix_extra_ltlib" == "no"], + [AS_IF([test "$with_pmix_extra_ltlib" = "yes" || test "$with_pmix_extra_ltlib" = "no"], [AC_MSG_RESULT([ERROR]) AC_MSG_WARN([Invalid value for --with-pmix-extra-ltlib:]) AC_MSG_WARN([ $with_pmix_extra_ltlib]) @@ -1184,7 +1184,7 @@ AC_MSG_CHECKING([if want to support dlopen of non-global namespaces]) AC_ARG_ENABLE([nonglobal-dlopen], AC_HELP_STRING([--enable-nonglobal-dlopen], [enable non-global dlopen (default: enabled)])) -if test "$enable_nonglobal_dlopen" == "no"; then +if test "$enable_nonglobal_dlopen" = "no"; then AC_MSG_RESULT([no]) pmix_need_libpmix=0 else diff --git a/opal/mca/pmix/pmix3x/pmix/configure.ac b/opal/mca/pmix/pmix3x/pmix/configure.ac index 53c0d64441f..3cbd07e8914 100644 --- a/opal/mca/pmix/pmix3x/pmix/configure.ac +++ b/opal/mca/pmix/pmix3x/pmix/configure.ac @@ -44,7 +44,7 @@ m4_include([config/autogen_found_items.m4]) AC_INIT([pmix], [m4_normalize(esyscmd([config/pmix_get_version.sh VERSION --tarball]))], - [https://github.com/pmix/pmix/issues], [pmix]) + [http://pmix.github.io/master], [pmix]) AC_PREREQ(2.69) AC_CONFIG_AUX_DIR(./config) # Note that this directory must *exactly* match what was specified via @@ -171,7 +171,7 @@ LT_PREREQ([2.2.6]) pmix_enable_shared="$enable_shared" pmix_enable_static="$enable_static" -AS_IF([test ! -z "$enable_static" && test "$enable_static" == "yes"], +AS_IF([test ! -z "$enable_static" && test "$enable_static" = "yes"], [CFLAGS="$CFLAGS -fPIC"]) AM_ENABLE_SHARED @@ -232,7 +232,6 @@ AS_IF([test ! -z "$PMIX_CFLAGS_cache"], [CFLAGS="$CFLAGS $PMIX_CFLAGS_cache"]) #fi # Cleanup duplicate flags -PMIX_FLAGS_UNIQ(CFLAGS) PMIX_FLAGS_UNIQ(CPPFLAGS) PMIX_FLAGS_UNIQ(LDFLAGS) PMIX_FLAGS_UNIQ(LIBS) @@ -259,17 +258,6 @@ AC_MSG_RESULT([$LDFLAGS]) AC_MSG_CHECKING([final LIBS]) AC_MSG_RESULT([$LIBS]) -#################################################################### -# -Werror for CI scripts -#################################################################### - -AC_ARG_ENABLE(werror, - AC_HELP_STRING([--enable-werror], - [Treat compiler warnings as errors]), -[ - CFLAGS="$CFLAGS -Werror" -]) - #################################################################### # Version information #################################################################### diff --git a/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec b/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec index 44b3810a926..6a47f054807 100644 --- a/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec +++ b/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec @@ -192,7 +192,7 @@ Summary: An extended/exascale implementation of PMI Name: %{?_name:%{_name}}%{!?_name:pmix} -Version: 3.1.3rc4 +Version: 3.1.3 Release: 1%{?dist} License: BSD Group: Development/Libraries diff --git a/opal/mca/pmix/pmix3x/pmix/contrib/whitespace-purge.sh b/opal/mca/pmix/pmix3x/pmix/contrib/whitespace-purge.sh index 9c9d8fe909e..905796bc1ef 100755 --- a/opal/mca/pmix/pmix3x/pmix/contrib/whitespace-purge.sh +++ b/opal/mca/pmix/pmix3x/pmix/contrib/whitespace-purge.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright (c) 2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015-2019 Intel, Inc. All rights reserved. # Copyright (c) 2015 Los Alamos National Security, LLC. All rights # reserved # Copyright (c) 2015 Cisco Systems, Inc. @@ -18,7 +18,7 @@ for file in $(git ls-files) ; do # skip sym links, pdfs, etc. If any other file types should be # skipped add the check here. type=$(file -b --mime-type -h $file) - if test ${type::4} == "text" ; then + if test ${type::4} = "text" ; then # Eliminate whitespace at the end of lines perl -pi -e 's/\s*$/\n/' $file fi diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/configure.m4 b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/configure.m4 index d822ffaf74e..f613cba102d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/configure.m4 +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/configure.m4 @@ -12,7 +12,7 @@ # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Sandia National Laboratories. All rights reserved. -# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -46,7 +46,7 @@ AC_DEFUN([MCA_pmix_pnet_opa_CONFIG],[ pmix_check_opamgt_dir= AC_MSG_CHECKING([if opamgt requested]) - AS_IF([test "$with_opamgt" == "no"], + AS_IF([test "$with_opamgt" = "no"], [AC_MSG_RESULT([no]) pmix_check_opamgt_happy=no], [AC_MSG_RESULT([yes]) From b6da090090efc66bef3a0278e528ed7f777c2dac Mon Sep 17 00:00:00 2001 From: "Nysal Jan K.A" Date: Wed, 3 Jul 2019 14:33:01 +0530 Subject: [PATCH 32/94] pml/ucx: Fix the max tag and context id values Signed-off-by: Nysal Jan K.A (cherry picked from commit fe4ef147f81b2ac56661175005de6c330eace690) --- ompi/mca/pml/ucx/pml_ucx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c index 348dae8e1a6..ffb7d618343 100644 --- a/ompi/mca/pml/ucx/pml_ucx.c +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -72,8 +72,8 @@ mca_pml_ucx_module_t ompi_pml_ucx = { .pml_mrecv = mca_pml_ucx_mrecv, .pml_dump = mca_pml_ucx_dump, .pml_ft_event = NULL, - .pml_max_contextid = 1ul << (PML_UCX_CONTEXT_BITS), - .pml_max_tag = 1ul << (PML_UCX_TAG_BITS - 1) + .pml_max_contextid = (1ul << (PML_UCX_CONTEXT_BITS)) - 1, + .pml_max_tag = (1ul << (PML_UCX_TAG_BITS - 1)) - 1 }, .ucp_context = NULL, .ucp_worker = NULL From 667fe3f3f3a0e9abe382e2e8a550609a3de630c3 Mon Sep 17 00:00:00 2001 From: Orivej Desh Date: Thu, 4 Jul 2019 20:24:50 +0000 Subject: [PATCH 33/94] Fix oob_tcp tcp_component_close segfault with active listeners oob_tcp in non-HNP mode shares libevent event_base with oob_base [1]. orte_oob_base_close calls: (1) oob_tcp component_shutdown, then (2) opal_progress_thread_finalize, then (3) oob_tcp tcp_component_close [2]. opal_progress_thread_finalize calls tracker_destructor [3] that frees the event_base [4]. If any oob_tcp event listeners are active at this time, oob_tcp will crash trying to delete them at [5] [6]. This change moves oob_tcp event listener cleanup from component_close to component_shutdown so that it happens before the event_base is freed. [1] https://github.com/open-mpi/ompi/blob/v4.0.1/orte/mca/oob/tcp/oob_tcp_listener.c#L160 [2] https://github.com/open-mpi/ompi/blob/v4.0.1/orte/mca/oob/base/oob_base_frame.c#L95 [3] https://github.com/open-mpi/ompi/blob/v4.0.1/opal/runtime/opal_progress_threads.c#L232 [4] https://github.com/open-mpi/ompi/blob/v4.0.1/opal/runtime/opal_progress_threads.c#L65 [5] https://github.com/open-mpi/ompi/blob/v4.0.1/orte/mca/oob/tcp/oob_tcp_component.c#L192 [6] https://github.com/open-mpi/ompi/blob/v4.0.1/orte/mca/oob/tcp/oob_tcp_listener.c#L955 Signed-off-by: Orivej Desh (cherry picked from commit 78b7e342bd26f493547f750dac842252e7a15143) --- orte/mca/oob/tcp/oob_tcp_component.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 2843ce9cd3c..244c799631e 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -188,9 +188,6 @@ static int tcp_component_open(void) */ static int tcp_component_close(void) { - /* cleanup listen event list */ - OPAL_LIST_DESTRUCT(&mca_oob_tcp_component.listeners); - OBJ_DESTRUCT(&mca_oob_tcp_component.peers); if (NULL != mca_oob_tcp_component.ipv4conns) { @@ -748,6 +745,9 @@ static void component_shutdown(void) (void **) &peer, node, &node); } + /* cleanup listen event list */ + OPAL_LIST_DESTRUCT(&mca_oob_tcp_component.listeners); + opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s TCP SHUTDOWN done", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); From 9499dcfe41c6c2d261d1bc456277a76b689acb72 Mon Sep 17 00:00:00 2001 From: Aurelien Bouteiller Date: Fri, 26 Jan 2018 10:11:21 -0500 Subject: [PATCH 34/94] Manage errors in NBC collective ops Signed-off-by: Aurelien Bouteiller Correctly bubble up errors in NBC collective operations Signed-off-by: Aurelien Bouteiller The error field of requests needs to be rearmed at start, not at create Signed-off-by: Aurelien Bouteiller (cherry picked from commit open-mpi/ompi@65660e5999fb7ed422536b79e1092520bed96694) --- ompi/mca/coll/libnbc/nbc.c | 39 ++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/ompi/mca/coll/libnbc/nbc.c b/ompi/mca/coll/libnbc/nbc.c index 039e6d4c9b0..54236c25a19 100644 --- a/ompi/mca/coll/libnbc/nbc.c +++ b/ompi/mca/coll/libnbc/nbc.c @@ -3,7 +3,7 @@ * Copyright (c) 2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2013 The University of Tennessee and The University + * Copyright (c) 2013-2018 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2006 The Technical University of Chemnitz. All @@ -335,8 +335,14 @@ int NBC_Progress(NBC_Handle *handle) { while (handle->req_count) { ompi_request_t *subreq = handle->req_array[handle->req_count - 1]; if (REQUEST_COMPLETE(subreq)) { - ompi_request_free(&subreq); + if(OPAL_UNLIKELY( OMPI_SUCCESS != subreq->req_status.MPI_ERROR )) { + NBC_Error ("MPI Error in NBC subrequest %p : %d", subreq, subreq->req_status.MPI_ERROR); + /* copy the error code from the underlying request and let the + * round finish */ + handle->super.req_status.MPI_ERROR = subreq->req_status.MPI_ERROR; + } handle->req_count--; + ompi_request_free(&subreq); } else { flag = false; break; @@ -349,6 +355,26 @@ int NBC_Progress(NBC_Handle *handle) { /* a round is finished */ if (flag) { + /* reset handle for next round */ + if (NULL != handle->req_array) { + /* free request array */ + free (handle->req_array); + handle->req_array = NULL; + } + + handle->req_count = 0; + + /* previous round had an error */ + if (OPAL_UNLIKELY(OMPI_SUCCESS != handle->super.req_status.MPI_ERROR)) { + res = handle->super.req_status.MPI_ERROR; + NBC_Error("NBC_Progress: an error %d was found during schedule %p at row-offset %li - aborting the schedule\n", res, handle->schedule, handle->row_offset); + handle->nbc_complete = true; + if (!handle->super.req_persistent) { + NBC_Free(handle); + } + return res; + } + /* adjust delim to start of current round */ NBC_DEBUG(5, "NBC_Progress: going in schedule %p to row-offset: %li\n", handle->schedule, handle->row_offset); delim = handle->schedule->data + handle->row_offset; @@ -358,14 +384,6 @@ int NBC_Progress(NBC_Handle *handle) { /* adjust delim to end of current round -> delimiter */ delim = delim + size; - if (NULL != handle->req_array) { - /* free request array */ - free (handle->req_array); - handle->req_array = NULL; - } - - handle->req_count = 0; - if (*delim == 0) { /* this was the last round - we're done */ NBC_DEBUG(5, "NBC_Progress last round finished - we're done\n"); @@ -638,6 +656,7 @@ int NBC_Start(NBC_Handle *handle) { /* kick off first round */ handle->super.req_state = OMPI_REQUEST_ACTIVE; + handle->super.req_status.MPI_ERROR = OMPI_SUCCESS; res = NBC_Start_round(handle); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; From c9e4240e70d6e5c1186f7ba2090b8f5bc1c9dc2b Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 9 Apr 2019 15:11:07 +0900 Subject: [PATCH 35/94] mpi: retain operation and datatype in non blocking collectives MPI standard states a user MPI_Op and/or user MPI_Datatype can be free'd after a call to a non blocking collective and before the non-blocking collective completes. Retain user (only) MPI_Op and MPI_Datatype when the non blocking call is invoked, and set a request callback so they are free'd when the MPI_Request completes. Thanks Thomas Ponweiser for reporting this Fixes open-mpi/ompi#2151 Fixes open-mpi/ompi#1304 Signed-off-by: Gilles Gouaillardet (cherry picked from commit open-mpi/ompi@0fe756d4166eecf2f0ee2598da690c69a7c824c4) --- ompi/mca/coll/base/coll_base_util.c | 189 +++++++++++++++++- ompi/mca/coll/base/coll_base_util.h | 47 ++++- ompi/mca/coll/libnbc/coll_libnbc.h | 14 +- ompi/mca/coll/libnbc/coll_libnbc_component.c | 30 +-- ompi/mca/coll/libnbc/nbc.c | 20 +- ompi/mpi/c/iallgather.c | 8 +- ompi/mpi/c/iallgatherv.c | 8 +- ompi/mpi/c/iallreduce.c | 10 +- ompi/mpi/c/ialltoall.c | 8 +- ompi/mpi/c/ialltoallv.c | 8 +- ompi/mpi/c/ialltoallw.c | 8 +- ompi/mpi/c/ibcast.c | 13 +- ompi/mpi/c/iexscan.c | 10 +- ompi/mpi/c/igather.c | 24 ++- ompi/mpi/c/igatherv.c | 22 +- ompi/mpi/c/ineighbor_allgather.c | 8 +- ompi/mpi/c/ineighbor_allgatherv.c | 8 +- ompi/mpi/c/ineighbor_alltoall.c | 8 +- ompi/mpi/c/ineighbor_alltoallv.c | 4 + ompi/mpi/c/ineighbor_alltoallw.c | 4 + ompi/mpi/c/ireduce.c | 10 +- ompi/mpi/c/ireduce_scatter.c | 10 +- ompi/mpi/c/ireduce_scatter_block.c | 10 +- ompi/mpi/c/iscan.c | 10 +- ompi/mpi/c/iscatter.c | 24 ++- ompi/mpi/c/iscatterv.c | 24 ++- ompi/mpiext/pcollreq/c/allgather_init.c | 8 +- ompi/mpiext/pcollreq/c/allgatherv_init.c | 8 +- ompi/mpiext/pcollreq/c/allreduce_init.c | 10 +- ompi/mpiext/pcollreq/c/alltoall_init.c | 8 +- ompi/mpiext/pcollreq/c/alltoallv_init.c | 8 +- ompi/mpiext/pcollreq/c/alltoallw_init.c | 8 +- ompi/mpiext/pcollreq/c/bcast_init.c | 13 +- ompi/mpiext/pcollreq/c/exscan_init.c | 10 +- ompi/mpiext/pcollreq/c/gather_init.c | 24 ++- ompi/mpiext/pcollreq/c/gatherv_init.c | 24 ++- .../pcollreq/c/neighbor_allgather_init.c | 8 +- .../pcollreq/c/neighbor_allgatherv_init.c | 8 +- .../pcollreq/c/neighbor_alltoall_init.c | 8 +- .../pcollreq/c/neighbor_alltoallv_init.c | 8 +- .../pcollreq/c/neighbor_alltoallw_init.c | 8 +- ompi/mpiext/pcollreq/c/reduce_init.c | 10 +- .../pcollreq/c/reduce_scatter_block_init.c | 10 +- ompi/mpiext/pcollreq/c/reduce_scatter_init.c | 10 +- ompi/mpiext/pcollreq/c/scan_init.c | 10 +- ompi/mpiext/pcollreq/c/scatter_init.c | 24 ++- ompi/mpiext/pcollreq/c/scatterv_init.c | 24 ++- 47 files changed, 649 insertions(+), 139 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_util.c b/ompi/mca/coll/base/coll_base_util.c index 6187098598f..57fe14bad20 100644 --- a/ompi/mca/coll/base/coll_base_util.c +++ b/ompi/mca/coll/base/coll_base_util.c @@ -9,8 +9,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,6 +26,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/mca/coll/base/coll_tags.h" #include "ompi/mca/coll/base/coll_base_functions.h" +#include "ompi/mca/topo/base/base.h" #include "ompi/mca/pml/pml.h" #include "coll_base_util.h" @@ -103,3 +104,187 @@ int ompi_rounddown(int num, int factor) num /= factor; return num * factor; /* floor(num / factor) * factor */ } + +static void release_objs_callback(struct ompi_coll_base_nbc_request_t *request) { + if (NULL != request->data.objs.objs[0]) { + OBJ_RELEASE(request->data.objs.objs[0]); + } + if (NULL != request->data.objs.objs[1]) { + OBJ_RELEASE(request->data.objs.objs[1]); + } +} + +static int complete_objs_callback(struct ompi_request_t *req) { + struct ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; + int rc = OMPI_SUCCESS; + assert (NULL != request); + if (NULL != request->cb.req_complete_cb) { + rc = request->cb.req_complete_cb(request->req_complete_cb_data); + } + release_objs_callback(request); + return rc; +} + +static int free_objs_callback(struct ompi_request_t **rptr) { + struct ompi_coll_base_nbc_request_t *request = *(ompi_coll_base_nbc_request_t **)rptr; + int rc = OMPI_SUCCESS; + if (NULL != request->cb.req_free) { + rc = request->cb.req_free(rptr); + } + release_objs_callback(request); + return rc; +} + +int ompi_coll_base_retain_op( ompi_request_t *req, ompi_op_t *op, + ompi_datatype_t *type) { + ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; + bool retain = false; + if (!ompi_op_is_intrinsic(op)) { + OBJ_RETAIN(op); + request->data.op.op = op; + retain = true; + } + if (!ompi_datatype_is_predefined(type)) { + OBJ_RETAIN(type); + request->data.op.datatype = type; + retain = true; + } + if (OPAL_UNLIKELY(retain)) { + /* We need to consider two cases : + * - non blocking collectives: + * the objects can be released when MPI_Wait() completes + * and we use the req_complete_cb callback + * - persistent non blocking collectives: + * the objects can only be released when the request is freed + * (e.g. MPI_Request_free() completes) and we use req_free callback + */ + if (req->req_persistent) { + request->cb.req_free = req->req_free; + req->req_free = free_objs_callback; + } else { + request->cb.req_complete_cb = req->req_complete_cb; + request->req_complete_cb_data = req->req_complete_cb_data; + req->req_complete_cb = complete_objs_callback; + req->req_complete_cb_data = request; + } + } + return OMPI_SUCCESS; +} + +int ompi_coll_base_retain_datatypes( ompi_request_t *req, ompi_datatype_t *stype, + ompi_datatype_t *rtype) { + ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; + bool retain = false; + if (NULL != stype && !ompi_datatype_is_predefined(stype)) { + OBJ_RETAIN(stype); + request->data.types.stype = stype; + retain = true; + } + if (NULL != rtype && !ompi_datatype_is_predefined(rtype)) { + OBJ_RETAIN(rtype); + request->data.types.rtype = rtype; + retain = true; + } + if (OPAL_UNLIKELY(retain)) { + if (req->req_persistent) { + request->cb.req_free = req->req_free; + req->req_free = free_objs_callback; + } else { + request->cb.req_complete_cb = req->req_complete_cb; + request->req_complete_cb_data = req->req_complete_cb_data; + req->req_complete_cb = complete_objs_callback; + req->req_complete_cb_data = request; + } + } + return OMPI_SUCCESS; +} + +static void release_vecs_callback(ompi_coll_base_nbc_request_t *request) { + ompi_communicator_t *comm = request->super.req_mpi_object.comm; + int scount, rcount; + if (OMPI_COMM_IS_TOPO(comm)) { + (void)mca_topo_base_neighbor_count (comm, &rcount, &scount); + } else { + scount = rcount = OMPI_COMM_IS_INTER(comm)?ompi_comm_remote_size(comm):ompi_comm_size(comm); + } + for (int i=0; idata.vecs.stypes && NULL != request->data.vecs.stypes[i]) { + OMPI_DATATYPE_RELEASE(request->data.vecs.stypes[i]); + } + } + for (int i=0; idata.vecs.rtypes && NULL != request->data.vecs.rtypes[i]) { + OMPI_DATATYPE_RELEASE(request->data.vecs.rtypes[i]); + } + } +} + +static int complete_vecs_callback(struct ompi_request_t *req) { + ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; + int rc = OMPI_SUCCESS; + assert (NULL != request); + if (NULL != request->cb.req_complete_cb) { + rc = request->cb.req_complete_cb(request->req_complete_cb_data); + } + release_vecs_callback(request); + return rc; +} + +static int free_vecs_callback(struct ompi_request_t **rptr) { + struct ompi_coll_base_nbc_request_t *request = *(ompi_coll_base_nbc_request_t **)rptr; + int rc = OMPI_SUCCESS; + if (NULL != request->cb.req_free) { + rc = request->cb.req_free(rptr); + } + release_vecs_callback(request); + return rc; +} + +int ompi_coll_base_retain_datatypes_w( ompi_request_t *req, + ompi_datatype_t *stypes[], ompi_datatype_t *rtypes[]) { + ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; + bool retain = false; + ompi_communicator_t *comm = request->super.req_mpi_object.comm; + int scount, rcount; + if (OMPI_COMM_IS_TOPO(comm)) { + (void)mca_topo_base_neighbor_count (comm, &rcount, &scount); + } else { + scount = rcount = OMPI_COMM_IS_INTER(comm)?ompi_comm_remote_size(comm):ompi_comm_size(comm); + } + + for (int i=0; idata.vecs.stypes = stypes; + request->data.vecs.rtypes = rtypes; + if (req->req_persistent) { + request->cb.req_free = req->req_free; + req->req_free = free_vecs_callback; + } else { + request->cb.req_complete_cb = req->req_complete_cb; + request->req_complete_cb_data = req->req_complete_cb_data; + req->req_complete_cb = complete_vecs_callback; + req->req_complete_cb_data = request; + } + } + return OMPI_SUCCESS; +} + +static void nbc_req_cons(ompi_coll_base_nbc_request_t *req) { + req->cb.req_complete_cb = NULL; + req->req_complete_cb_data = NULL; + req->data.objs.objs[0] = NULL; + req->data.objs.objs[1] = NULL; +} + +OBJ_CLASS_INSTANCE(ompi_coll_base_nbc_request_t, ompi_request_t, nbc_req_cons, NULL); diff --git a/ompi/mca/coll/base/coll_base_util.h b/ompi/mca/coll/base/coll_base_util.h index 8306b8fe83d..a5b80161240 100644 --- a/ompi/mca/coll/base/coll_base_util.h +++ b/ompi/mca/coll/base/coll_base_util.h @@ -9,8 +9,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,10 +27,41 @@ #include "ompi/mca/mca.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/request/request.h" +#include "ompi/op/op.h" #include "ompi/mca/pml/pml.h" BEGIN_C_DECLS +struct ompi_coll_base_nbc_request_t { + ompi_request_t super; + union { + ompi_request_complete_fn_t req_complete_cb; + ompi_request_free_fn_t req_free; + } cb; + void *req_complete_cb_data; + union { + struct { + ompi_op_t *op; + ompi_datatype_t *datatype; + } op; + struct { + ompi_datatype_t *stype; + ompi_datatype_t *rtype; + } types; + struct { + opal_object_t *objs[2]; + } objs; + struct { + ompi_datatype_t **stypes; + ompi_datatype_t **rtypes; + } vecs; + } data; +}; + +OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_coll_base_nbc_request_t); + +typedef struct ompi_coll_base_nbc_request_t ompi_coll_base_nbc_request_t; + /** * A MPI_like function doing a send and a receive simultaneously. * If one of the communications results in a zero-byte message the @@ -84,5 +115,17 @@ unsigned int ompi_mirror_perm(unsigned int x, int nbits); */ int ompi_rounddown(int num, int factor); +int ompi_coll_base_retain_op( ompi_request_t *request, + ompi_op_t *op, + ompi_datatype_t *type); + +int ompi_coll_base_retain_datatypes( ompi_request_t *request, + ompi_datatype_t *stype, + ompi_datatype_t *rtype); + +int ompi_coll_base_retain_datatypes_w( ompi_request_t *request, + ompi_datatype_t *stypes[], + ompi_datatype_t *rtypes[]); + END_C_DECLS #endif /* MCA_COLL_BASE_UTIL_EXPORT_H */ diff --git a/ompi/mca/coll/libnbc/coll_libnbc.h b/ompi/mca/coll/libnbc/coll_libnbc.h index 967a7794257..17abf86f2ab 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc.h +++ b/ompi/mca/coll/libnbc/coll_libnbc.h @@ -13,8 +13,8 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -28,7 +28,7 @@ #define MCA_COLL_LIBNBC_EXPORT_H #include "ompi/mca/coll/coll.h" -#include "ompi/request/request.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "opal/sys/atomic.h" BEGIN_C_DECLS @@ -114,7 +114,7 @@ typedef struct NBC_Schedule NBC_Schedule; OBJ_CLASS_DECLARATION(NBC_Schedule); struct ompi_coll_libnbc_request_t { - ompi_request_t super; + ompi_coll_base_nbc_request_t super; MPI_Comm comm; long row_offset; bool nbc_complete; /* status in libnbc level */ @@ -138,13 +138,13 @@ typedef ompi_coll_libnbc_request_t NBC_Handle; opal_free_list_item_t *item; \ item = opal_free_list_wait (&mca_coll_libnbc_component.requests); \ req = (ompi_coll_libnbc_request_t*) item; \ - OMPI_REQUEST_INIT(&req->super, persistent); \ - req->super.req_mpi_object.comm = comm; \ + OMPI_REQUEST_INIT(&req->super.super, persistent); \ + req->super.super.req_mpi_object.comm = comm; \ } while (0) #define OMPI_COLL_LIBNBC_REQUEST_RETURN(req) \ do { \ - OMPI_REQUEST_FINI(&(req)->super); \ + OMPI_REQUEST_FINI(&(req)->super.super); \ opal_free_list_return (&mca_coll_libnbc_component.requests, \ (opal_free_list_item_t*) (req)); \ } while (0) diff --git a/ompi/mca/coll/libnbc/coll_libnbc_component.c b/ompi/mca/coll/libnbc/coll_libnbc_component.c index bf4960d9235..c5b1656385d 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc_component.c +++ b/ompi/mca/coll/libnbc/coll_libnbc_component.c @@ -13,8 +13,8 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2017 Ian Bradley Morgan and Anthony Skjellum. All * rights reserved. @@ -328,21 +328,21 @@ ompi_coll_libnbc_progress(void) /* done, remove and complete */ OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock); opal_list_remove_item(&mca_coll_libnbc_component.active_requests, - &request->super.super.super); + &request->super.super.super.super); OPAL_THREAD_UNLOCK(&mca_coll_libnbc_component.lock); if( OMPI_SUCCESS == res || NBC_OK == res || NBC_SUCCESS == res ) { - request->super.req_status.MPI_ERROR = OMPI_SUCCESS; + request->super.super.req_status.MPI_ERROR = OMPI_SUCCESS; } else { - request->super.req_status.MPI_ERROR = res; + request->super.super.req_status.MPI_ERROR = res; } - if(request->super.req_persistent) { + if(request->super.super.req_persistent) { /* reset for the next communication */ request->row_offset = 0; } - if(!request->super.req_persistent || !REQUEST_COMPLETE(&request->super)) { - ompi_request_complete(&request->super, true); + if(!request->super.super.req_persistent || !REQUEST_COMPLETE(&request->super.super)) { + ompi_request_complete(&request->super.super, true); } } OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock); @@ -407,7 +407,7 @@ request_start(size_t count, ompi_request_t ** requests) NBC_DEBUG(5, "tmpbuf address=%p size=%u\n", handle->tmpbuf, sizeof(handle->tmpbuf)); NBC_DEBUG(5, "--------------------------------\n"); - handle->super.req_complete = REQUEST_PENDING; + handle->super.super.req_complete = REQUEST_PENDING; handle->nbc_complete = false; res = NBC_Start(handle); @@ -437,7 +437,7 @@ request_free(struct ompi_request_t **ompi_req) ompi_coll_libnbc_request_t *request = (ompi_coll_libnbc_request_t*) *ompi_req; - if( !REQUEST_COMPLETE(&request->super) ) { + if( !REQUEST_COMPLETE(&request->super.super) ) { return MPI_ERR_REQUEST; } @@ -451,11 +451,11 @@ request_free(struct ompi_request_t **ompi_req) static void request_construct(ompi_coll_libnbc_request_t *request) { - request->super.req_type = OMPI_REQUEST_COLL; - request->super.req_status._cancelled = 0; - request->super.req_start = request_start; - request->super.req_free = request_free; - request->super.req_cancel = request_cancel; + request->super.super.req_type = OMPI_REQUEST_COLL; + request->super.super.req_status._cancelled = 0; + request->super.super.req_start = request_start; + request->super.super.req_free = request_free; + request->super.super.req_cancel = request_cancel; } diff --git a/ompi/mca/coll/libnbc/nbc.c b/ompi/mca/coll/libnbc/nbc.c index 54236c25a19..171f5a37e9c 100644 --- a/ompi/mca/coll/libnbc/nbc.c +++ b/ompi/mca/coll/libnbc/nbc.c @@ -10,8 +10,8 @@ * rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * * Author(s): Torsten Hoefler * @@ -339,7 +339,7 @@ int NBC_Progress(NBC_Handle *handle) { NBC_Error ("MPI Error in NBC subrequest %p : %d", subreq, subreq->req_status.MPI_ERROR); /* copy the error code from the underlying request and let the * round finish */ - handle->super.req_status.MPI_ERROR = subreq->req_status.MPI_ERROR; + handle->super.super.req_status.MPI_ERROR = subreq->req_status.MPI_ERROR; } handle->req_count--; ompi_request_free(&subreq); @@ -365,11 +365,11 @@ int NBC_Progress(NBC_Handle *handle) { handle->req_count = 0; /* previous round had an error */ - if (OPAL_UNLIKELY(OMPI_SUCCESS != handle->super.req_status.MPI_ERROR)) { - res = handle->super.req_status.MPI_ERROR; + if (OPAL_UNLIKELY(OMPI_SUCCESS != handle->super.super.req_status.MPI_ERROR)) { + res = handle->super.super.req_status.MPI_ERROR; NBC_Error("NBC_Progress: an error %d was found during schedule %p at row-offset %li - aborting the schedule\n", res, handle->schedule, handle->row_offset); handle->nbc_complete = true; - if (!handle->super.req_persistent) { + if (!handle->super.super.req_persistent) { NBC_Free(handle); } return res; @@ -389,7 +389,7 @@ int NBC_Progress(NBC_Handle *handle) { NBC_DEBUG(5, "NBC_Progress last round finished - we're done\n"); handle->nbc_complete = true; - if (!handle->super.req_persistent) { + if (!handle->super.super.req_persistent) { NBC_Free(handle); } @@ -655,15 +655,15 @@ int NBC_Start(NBC_Handle *handle) { } /* kick off first round */ - handle->super.req_state = OMPI_REQUEST_ACTIVE; - handle->super.req_status.MPI_ERROR = OMPI_SUCCESS; + handle->super.super.req_state = OMPI_REQUEST_ACTIVE; + handle->super.super.req_status.MPI_ERROR = OMPI_SUCCESS; res = NBC_Start_round(handle); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock); - opal_list_append(&mca_coll_libnbc_component.active_requests, &(handle->super.super.super)); + opal_list_append(&mca_coll_libnbc_component.active_requests, (opal_list_item_t *)handle); OPAL_THREAD_UNLOCK(&mca_coll_libnbc_component.lock); return OMPI_SUCCESS; diff --git a/ompi/mpi/c/iallgather.c b/ompi/mpi/c/iallgather.c index 7d2740b6512..8e0abe3fe8c 100644 --- a/ompi/mpi/c/iallgather.c +++ b/ompi/mpi/c/iallgather.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -102,6 +103,9 @@ int MPI_Iallgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = comm->c_coll->coll_iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, comm->c_coll->coll_iallgather_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/iallgatherv.c b/ompi/mpi/c/iallgatherv.c index 0373a15b1d3..e743cb9b06f 100644 --- a/ompi/mpi/c/iallgatherv.c +++ b/ompi/mpi/c/iallgatherv.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -126,6 +127,9 @@ int MPI_Iallgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, recvbuf, recvcounts, displs, recvtype, comm, request, comm->c_coll->coll_iallgatherv_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/iallreduce.c b/ompi/mpi/c/iallreduce.c index d0ea511cf84..bfa968c55b4 100644 --- a/ompi/mpi/c/iallreduce.c +++ b/ompi/mpi/c/iallreduce.c @@ -12,8 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -31,6 +31,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -112,10 +113,11 @@ int MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request, comm->c_coll->coll_iallreduce_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ialltoall.c b/ompi/mpi/c/ialltoall.c index 2d46b76f38f..0637f29f396 100644 --- a/ompi/mpi/c/ialltoall.c +++ b/ompi/mpi/c/ialltoall.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -101,5 +102,8 @@ int MPI_Ialltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = comm->c_coll->coll_ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, comm->c_coll->coll_ialltoall_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ialltoallv.c b/ompi/mpi/c/ialltoallv.c index 577b3828949..cef857cdf78 100644 --- a/ompi/mpi/c/ialltoallv.c +++ b/ompi/mpi/c/ialltoallv.c @@ -13,8 +13,8 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -130,6 +131,9 @@ int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], const int sdispl err = comm->c_coll->coll_ialltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, request, comm->c_coll->coll_ialltoallv_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ialltoallw.c b/ompi/mpi/c/ialltoallw.c index b7bc86eaa7d..6dc4af8854a 100644 --- a/ompi/mpi/c/ialltoallw.c +++ b/ompi/mpi/c/ialltoallw.c @@ -13,8 +13,8 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -127,6 +128,9 @@ int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispl sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, request, comm->c_coll->coll_ialltoallw_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes_w(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtypes, recvtypes); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ibcast.c b/ompi/mpi/c/ibcast.c index 1f049b4c6de..2dcdbb9633d 100644 --- a/ompi/mpi/c/ibcast.c +++ b/ompi/mpi/c/ibcast.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017-2018 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. @@ -19,6 +19,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -86,5 +87,13 @@ int MPI_Ibcast(void *buffer, int count, MPI_Datatype datatype, err = comm->c_coll->coll_ibcast(buffer, count, datatype, root, comm, request, comm->c_coll->coll_ibcast_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (!OMPI_COMM_IS_INTRA(comm)) { + if (MPI_PROC_NULL == root) { + datatype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, datatype, NULL); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/iexscan.c b/ompi/mpi/c/iexscan.c index 14cf23c590b..4c56e08f1e4 100644 --- a/ompi/mpi/c/iexscan.c +++ b/ompi/mpi/c/iexscan.c @@ -12,8 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +29,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -82,10 +83,11 @@ int MPI_Iexscan(const void *sendbuf, void *recvbuf, int count, /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_iexscan(sendbuf, recvbuf, count, datatype, op, comm, request, comm->c_coll->coll_iexscan_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/igather.c b/ompi/mpi/c/igather.c index 3fcda7e8069..c876daa7ec7 100644 --- a/ompi/mpi/c/igather.c +++ b/ompi/mpi/c/igather.c @@ -15,8 +15,8 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -173,5 +174,24 @@ int MPI_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = comm->c_coll->coll_igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request, comm->c_coll->coll_igather_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == sendbuf) { + sendtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + recvtype = NULL; + } + } else { + if (MPI_ROOT == root) { + sendtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + recvtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/igatherv.c b/ompi/mpi/c/igatherv.c index e2deab3cc9f..1d575dce4cc 100644 --- a/ompi/mpi/c/igatherv.c +++ b/ompi/mpi/c/igatherv.c @@ -13,7 +13,7 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science + * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -29,6 +29,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -196,5 +197,24 @@ int MPI_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = comm->c_coll->coll_igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, displs, recvtype, root, comm, request, comm->c_coll->coll_igatherv_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == sendbuf) { + sendtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + recvtype = NULL; + } + } else { + if (MPI_ROOT == root) { + sendtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + recvtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ineighbor_allgather.c b/ompi/mpi/c/ineighbor_allgather.c index 2706ea44d4a..cba5b5d4e36 100644 --- a/ompi/mpi/c/ineighbor_allgather.c +++ b/ompi/mpi/c/ineighbor_allgather.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -32,6 +32,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -124,6 +125,9 @@ int MPI_Ineighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sen err = comm->c_coll->coll_ineighbor_allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, comm->c_coll->coll_ineighbor_allgather_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ineighbor_allgatherv.c b/ompi/mpi/c/ineighbor_allgatherv.c index 2f3c244064c..58dedb61057 100644 --- a/ompi/mpi/c/ineighbor_allgatherv.c +++ b/ompi/mpi/c/ineighbor_allgatherv.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -32,6 +32,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -147,6 +148,9 @@ int MPI_Ineighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype se recvbuf, (int *) recvcounts, (int *) displs, recvtype, comm, request, comm->c_coll->coll_ineighbor_allgatherv_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ineighbor_alltoall.c b/ompi/mpi/c/ineighbor_alltoall.c index b3d0846421e..b03b7cc50fa 100644 --- a/ompi/mpi/c/ineighbor_alltoall.c +++ b/ompi/mpi/c/ineighbor_alltoall.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -32,6 +32,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -124,5 +125,8 @@ int MPI_Ineighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype send err = comm->c_coll->coll_ineighbor_alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request, comm->c_coll->coll_ineighbor_alltoall_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ineighbor_alltoallv.c b/ompi/mpi/c/ineighbor_alltoallv.c index 9645e15b05d..a44d081e10a 100644 --- a/ompi/mpi/c/ineighbor_alltoallv.c +++ b/ompi/mpi/c/ineighbor_alltoallv.c @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -147,6 +148,9 @@ int MPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const i err = comm->c_coll->coll_ineighbor_alltoallv(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, request, comm->c_coll->coll_ineighbor_alltoallv_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ineighbor_alltoallw.c b/ompi/mpi/c/ineighbor_alltoallw.c index 150f28d7173..efb4d24f5f7 100644 --- a/ompi/mpi/c/ineighbor_alltoallw.c +++ b/ompi/mpi/c/ineighbor_alltoallw.c @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -147,6 +148,9 @@ int MPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const M err = comm->c_coll->coll_ineighbor_alltoallw(sendbuf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, request, comm->c_coll->coll_ineighbor_alltoallw_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes_w(*request, sendtypes, recvtypes); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ireduce.c b/ompi/mpi/c/ireduce.c index 47948887824..be552250fce 100644 --- a/ompi/mpi/c/ireduce.c +++ b/ompi/mpi/c/ireduce.c @@ -13,8 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -31,6 +31,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -136,10 +137,11 @@ int MPI_Ireduce(const void *sendbuf, void *recvbuf, int count, OPAL_CR_ENTER_LIBRARY(); /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_ireduce(sendbuf, recvbuf, count, datatype, op, root, comm, request, comm->c_coll->coll_ireduce_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ireduce_scatter.c b/ompi/mpi/c/ireduce_scatter.c index 211b217971e..56525fa19f7 100644 --- a/ompi/mpi/c/ireduce_scatter.c +++ b/ompi/mpi/c/ireduce_scatter.c @@ -13,8 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -31,6 +31,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -133,10 +134,11 @@ int MPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_ireduce_scatter(sendbuf, recvbuf, recvcounts, datatype, op, comm, request, comm->c_coll->coll_ireduce_scatter_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/ireduce_scatter_block.c b/ompi/mpi/c/ireduce_scatter_block.c index ded4abf2232..ce43ab3cd4f 100644 --- a/ompi/mpi/c/ireduce_scatter_block.c +++ b/ompi/mpi/c/ireduce_scatter_block.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -100,10 +101,11 @@ int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_ireduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm, request, comm->c_coll->coll_ireduce_scatter_block_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/iscan.c b/ompi/mpi/c/iscan.c index 34502b8e366..cfae0ff409a 100644 --- a/ompi/mpi/c/iscan.c +++ b/ompi/mpi/c/iscan.c @@ -13,8 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -96,11 +97,12 @@ int MPI_Iscan(const void *sendbuf, void *recvbuf, int count, /* Call the coll component to actually perform the allgather */ - OBJ_RETAIN(op); err = comm->c_coll->coll_iscan(sendbuf, recvbuf, count, datatype, op, comm, request, comm->c_coll->coll_iscan_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/iscatter.c b/ompi/mpi/c/iscatter.c index 79a22d57a52..3357ad21158 100644 --- a/ompi/mpi/c/iscatter.c +++ b/ompi/mpi/c/iscatter.c @@ -15,8 +15,8 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -156,5 +157,24 @@ int MPI_Iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = comm->c_coll->coll_iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request, comm->c_coll->coll_iscatter_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == recvbuf) { + recvtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + sendtype = NULL; + } + } else { + if (MPI_ROOT == root) { + recvtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + sendtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpi/c/iscatterv.c b/ompi/mpi/c/iscatterv.c index 66ae9003caa..2d164662f4a 100644 --- a/ompi/mpi/c/iscatterv.c +++ b/ompi/mpi/c/iscatterv.c @@ -13,8 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +29,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -196,5 +197,24 @@ int MPI_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[ err = comm->c_coll->coll_iscatterv(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, request, comm->c_coll->coll_iscatterv_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == recvbuf) { + recvtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + sendtype = NULL; + } + } else { + if (MPI_ROOT == root) { + recvtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + sendtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/allgather_init.c b/ompi/mpiext/pcollreq/c/allgather_init.c index 46a568bc65d..4b699f91a16 100644 --- a/ompi/mpiext/pcollreq/c/allgather_init.c +++ b/ompi/mpiext/pcollreq/c/allgather_init.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -103,6 +104,9 @@ int MPIX_Allgather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtyp err = comm->c_coll->coll_allgather_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, comm->c_coll->coll_allgather_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/allgatherv_init.c b/ompi/mpiext/pcollreq/c/allgatherv_init.c index d4b3c7368ab..2021ab9668e 100644 --- a/ompi/mpiext/pcollreq/c/allgatherv_init.c +++ b/ompi/mpiext/pcollreq/c/allgatherv_init.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -128,6 +129,9 @@ int MPIX_Allgatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendty recvbuf, recvcounts, displs, recvtype, comm, info, request, comm->c_coll->coll_allgatherv_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/allreduce_init.c b/ompi/mpiext/pcollreq/c/allreduce_init.c index dd224f1115d..1213395f3ec 100644 --- a/ompi/mpiext/pcollreq/c/allreduce_init.c +++ b/ompi/mpiext/pcollreq/c/allreduce_init.c @@ -12,8 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -32,6 +32,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -115,9 +116,10 @@ int MPIX_Allreduce_init(const void *sendbuf, void *recvbuf, int count, /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_allreduce_init(sendbuf, recvbuf, count, datatype, op, comm, info, request, comm->c_coll->coll_allreduce_init_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/alltoall_init.c b/ompi/mpiext/pcollreq/c/alltoall_init.c index b176f63c753..7cb36216474 100644 --- a/ompi/mpiext/pcollreq/c/alltoall_init.c +++ b/ompi/mpiext/pcollreq/c/alltoall_init.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -102,5 +103,8 @@ int MPIX_Alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype err = comm->c_coll->coll_alltoall_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, comm->c_coll->coll_alltoall_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/alltoallv_init.c b/ompi/mpiext/pcollreq/c/alltoallv_init.c index 06d5922b2ac..3d34536fb01 100644 --- a/ompi/mpiext/pcollreq/c/alltoallv_init.c +++ b/ompi/mpiext/pcollreq/c/alltoallv_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -131,6 +132,9 @@ int MPIX_Alltoallv_init(const void *sendbuf, const int sendcounts[], const int s err = comm->c_coll->coll_alltoallv_init(sendbuf, sendcounts, sdispls, sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, info, request, comm->c_coll->coll_alltoallv_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/alltoallw_init.c b/ompi/mpiext/pcollreq/c/alltoallw_init.c index 405cc4c4f82..50902f1f639 100644 --- a/ompi/mpiext/pcollreq/c/alltoallw_init.c +++ b/ompi/mpiext/pcollreq/c/alltoallw_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -128,6 +129,9 @@ int MPIX_Alltoallw_init(const void *sendbuf, const int sendcounts[], const int s sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, info, request, comm->c_coll->coll_alltoallw_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes_w(*request, (MPI_IN_PLACE==sendbuf)?NULL:sendtypes, recvtypes); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/bcast_init.c b/ompi/mpiext/pcollreq/c/bcast_init.c index 6a2798a9700..9cf71a7a671 100644 --- a/ompi/mpiext/pcollreq/c/bcast_init.c +++ b/ompi/mpiext/pcollreq/c/bcast_init.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017-2018 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. @@ -19,6 +19,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -87,5 +88,13 @@ int MPIX_Bcast_init(void *buffer, int count, MPI_Datatype datatype, err = comm->c_coll->coll_bcast_init(buffer, count, datatype, root, comm, info, request, comm->c_coll->coll_bcast_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (!OMPI_COMM_IS_INTRA(comm)) { + if (MPI_PROC_NULL == root) { + datatype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, datatype, NULL); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/exscan_init.c b/ompi/mpiext/pcollreq/c/exscan_init.c index 23f155429cd..f8e34ced68a 100644 --- a/ompi/mpiext/pcollreq/c/exscan_init.c +++ b/ompi/mpiext/pcollreq/c/exscan_init.c @@ -12,8 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +29,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -84,10 +85,11 @@ int MPIX_Exscan_init(const void *sendbuf, void *recvbuf, int count, /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_exscan_init(sendbuf, recvbuf, count, datatype, op, comm, info, request, comm->c_coll->coll_exscan_init_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/gather_init.c b/ompi/mpiext/pcollreq/c/gather_init.c index f62dd9b54dd..051a0eaa133 100644 --- a/ompi/mpiext/pcollreq/c/gather_init.c +++ b/ompi/mpiext/pcollreq/c/gather_init.c @@ -15,8 +15,8 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -174,5 +175,24 @@ int MPIX_Gather_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = comm->c_coll->coll_gather_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, info, request, comm->c_coll->coll_gather_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == sendbuf) { + sendtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + recvtype = NULL; + } + } else { + if (MPI_ROOT == root) { + sendtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + recvtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/gatherv_init.c b/ompi/mpiext/pcollreq/c/gatherv_init.c index fbbd346008c..bd875a051c7 100644 --- a/ompi/mpiext/pcollreq/c/gatherv_init.c +++ b/ompi/mpiext/pcollreq/c/gatherv_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +29,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/memchecker.h" #include "ompi/runtime/ompi_spc.h" @@ -199,5 +200,24 @@ int MPIX_Gatherv_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, recvcounts, displs, recvtype, root, comm, info, request, comm->c_coll->coll_gatherv_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == sendbuf) { + sendtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + recvtype = NULL; + } + } else { + if (MPI_ROOT == root) { + sendtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + recvtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/neighbor_allgather_init.c b/ompi/mpiext/pcollreq/c/neighbor_allgather_init.c index 4494b507b72..cd3037d0bda 100644 --- a/ompi/mpiext/pcollreq/c/neighbor_allgather_init.c +++ b/ompi/mpiext/pcollreq/c/neighbor_allgather_init.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Rigde National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -32,6 +32,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -125,6 +126,9 @@ int MPIX_Neighbor_allgather_init(const void *sendbuf, int sendcount, MPI_Datatyp err = comm->c_coll->coll_neighbor_allgather_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, info, request, comm->c_coll->coll_neighbor_allgather_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/neighbor_allgatherv_init.c b/ompi/mpiext/pcollreq/c/neighbor_allgatherv_init.c index 66fa0487c57..3e53b846312 100644 --- a/ompi/mpiext/pcollreq/c/neighbor_allgatherv_init.c +++ b/ompi/mpiext/pcollreq/c/neighbor_allgatherv_init.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -32,6 +32,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -149,6 +150,9 @@ int MPIX_Neighbor_allgatherv_init(const void *sendbuf, int sendcount, MPI_Dataty recvbuf, (int *) recvcounts, (int *) displs, recvtype, comm, info, request, comm->c_coll->coll_neighbor_allgatherv_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/neighbor_alltoall_init.c b/ompi/mpiext/pcollreq/c/neighbor_alltoall_init.c index c564ee7e9e5..c2b0ac3c19b 100644 --- a/ompi/mpiext/pcollreq/c/neighbor_alltoall_init.c +++ b/ompi/mpiext/pcollreq/c/neighbor_alltoall_init.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -32,6 +32,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -126,5 +127,8 @@ int MPIX_Neighbor_alltoall_init(const void *sendbuf, int sendcount, MPI_Datatype recvbuf, recvcount, recvtype, comm, info, request, comm->c_coll->coll_neighbor_alltoall_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/neighbor_alltoallv_init.c b/ompi/mpiext/pcollreq/c/neighbor_alltoallv_init.c index 8d3503bf57b..f86e256d815 100644 --- a/ompi/mpiext/pcollreq/c/neighbor_alltoallv_init.c +++ b/ompi/mpiext/pcollreq/c/neighbor_alltoallv_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -149,6 +150,9 @@ int MPIX_Neighbor_alltoallv_init(const void *sendbuf, const int sendcounts[], co sendtype, recvbuf, recvcounts, rdispls, recvtype, comm, info, request, comm->c_coll->coll_neighbor_alltoallv_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/neighbor_alltoallw_init.c b/ompi/mpiext/pcollreq/c/neighbor_alltoallw_init.c index 68e2b2cad22..1143ccbb3cf 100644 --- a/ompi/mpiext/pcollreq/c/neighbor_alltoallw_init.c +++ b/ompi/mpiext/pcollreq/c/neighbor_alltoallw_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" @@ -149,6 +150,9 @@ int MPIX_Neighbor_alltoallw_init(const void *sendbuf, const int sendcounts[], co recvbuf, recvcounts, rdispls, recvtypes, comm, info, request, comm->c_coll->coll_neighbor_alltoallw_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_datatypes_w(*request, sendtypes, recvtypes); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/reduce_init.c b/ompi/mpiext/pcollreq/c/reduce_init.c index 1e72877d504..d3b50747bfe 100644 --- a/ompi/mpiext/pcollreq/c/reduce_init.c +++ b/ompi/mpiext/pcollreq/c/reduce_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -32,6 +32,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -139,10 +140,11 @@ int MPIX_Reduce_init(const void *sendbuf, void *recvbuf, int count, OPAL_CR_ENTER_LIBRARY(); /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_reduce_init(sendbuf, recvbuf, count, datatype, op, root, comm, info, request, comm->c_coll->coll_reduce_init_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/reduce_scatter_block_init.c b/ompi/mpiext/pcollreq/c/reduce_scatter_block_init.c index ef000ae6e16..c0b8c344e62 100644 --- a/ompi/mpiext/pcollreq/c/reduce_scatter_block_init.c +++ b/ompi/mpiext/pcollreq/c/reduce_scatter_block_init.c @@ -14,8 +14,8 @@ * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -101,10 +102,11 @@ int MPIX_Reduce_scatter_block_init(const void *sendbuf, void *recvbuf, int recvc /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_reduce_scatter_block_init(sendbuf, recvbuf, recvcount, datatype, op, comm, info, request, comm->c_coll->coll_reduce_scatter_block_init_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/reduce_scatter_init.c b/ompi/mpiext/pcollreq/c/reduce_scatter_init.c index b8c470f064d..5bf5712e3e6 100644 --- a/ompi/mpiext/pcollreq/c/reduce_scatter_init.c +++ b/ompi/mpiext/pcollreq/c/reduce_scatter_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -32,6 +32,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -135,10 +136,11 @@ int MPIX_Reduce_scatter_init(const void *sendbuf, void *recvbuf, const int recvc /* Invoke the coll component to perform the back-end operation */ - OBJ_RETAIN(op); err = comm->c_coll->coll_reduce_scatter_init(sendbuf, recvbuf, recvcounts, datatype, op, comm, info, request, comm->c_coll->coll_reduce_scatter_init_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/scan_init.c b/ompi/mpiext/pcollreq/c/scan_init.c index 8ff34dd5f5f..35540c1a102 100644 --- a/ompi/mpiext/pcollreq/c/scan_init.c +++ b/ompi/mpiext/pcollreq/c/scan_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,6 +30,7 @@ #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/op/op.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -98,11 +99,12 @@ int MPIX_Scan_init(const void *sendbuf, void *recvbuf, int count, /* Call the coll component to actually perform the allgather */ - OBJ_RETAIN(op); err = comm->c_coll->coll_scan_init(sendbuf, recvbuf, count, datatype, op, comm, info, request, comm->c_coll->coll_scan_init_module); - OBJ_RELEASE(op); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + ompi_coll_base_retain_op(*request, op, datatype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/scatter_init.c b/ompi/mpiext/pcollreq/c/scatter_init.c index 30ee31f88d4..7ab7700c62a 100644 --- a/ompi/mpiext/pcollreq/c/scatter_init.c +++ b/ompi/mpiext/pcollreq/c/scatter_init.c @@ -15,8 +15,8 @@ * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -157,5 +158,24 @@ int MPIX_Scatter_init(const void *sendbuf, int sendcount, MPI_Datatype sendtype, err = comm->c_coll->coll_scatter_init(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, info, request, comm->c_coll->coll_scatter_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == recvbuf) { + recvtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + sendtype = NULL; + } + } else { + if (MPI_ROOT == root) { + recvtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + sendtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } diff --git a/ompi/mpiext/pcollreq/c/scatterv_init.c b/ompi/mpiext/pcollreq/c/scatterv_init.c index fef368caf7b..d2d53c7fd95 100644 --- a/ompi/mpiext/pcollreq/c/scatterv_init.c +++ b/ompi/mpiext/pcollreq/c/scatterv_init.c @@ -13,8 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +29,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/coll/base/coll_base_util.h" #include "ompi/memchecker.h" #include "ompi/mpiext/pcollreq/c/mpiext_pcollreq_c.h" #include "ompi/runtime/ompi_spc.h" @@ -197,5 +198,24 @@ int MPIX_Scatterv_init(const void *sendbuf, const int sendcounts[], const int di err = comm->c_coll->coll_scatterv_init(sendbuf, sendcounts, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, info, request, comm->c_coll->coll_scatterv_init_module); + if (OPAL_LIKELY(OMPI_SUCCESS == err)) { + if (OMPI_COMM_IS_INTRA(comm)) { + if (MPI_IN_PLACE == recvbuf) { + recvtype = NULL; + } else if (ompi_comm_rank(comm) != root) { + sendtype = NULL; + } + } else { + if (MPI_ROOT == root) { + recvtype = NULL; + } else if (MPI_PROC_NULL == root) { + sendtype = NULL; + recvtype = NULL; + } else { + sendtype = NULL; + } + } + ompi_coll_base_retain_datatypes(*request, sendtype, recvtype); + } OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME); } From 71f240f078b9dcb177b15b3b2ee8777d039801b9 Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Fri, 12 Jul 2019 07:35:56 -0600 Subject: [PATCH 36/94] btl/openib: fix issue 6785 Commit d7053a3 broke things for the case when Open MPI 4.0.x is built without UCX support. Problem was it was trying to partially initialize the btl to try and delay printing of a help message till wireup. Well this sort of doesn't work in all cases. Rather than keep piling on changes to support a help message for a BTL that we are deprecating, take a keep it simple stupid approach. So, revert most of d7053a3 and instead put the help message back in the original location, during scan of ports of the available HCAs to check for whether or not link layer for that port is configured for ethernet or infiniband. If Open MPI was built with UCX support, don't emit the help message, if UCX was not linked in, emit the help message. Verified on a system with connectX5 HCAs configured with two ports configured for ethernet and two for infiniband. relates to #6785 Signed-off-by: Howard Pritchard --- config/ompi_check_ucx.m4 | 2 + opal/mca/btl/openib/btl_openib.c | 137 ++++++++++----------- opal/mca/btl/openib/btl_openib.h | 4 +- opal/mca/btl/openib/btl_openib_component.c | 91 ++++++-------- 4 files changed, 103 insertions(+), 131 deletions(-) diff --git a/config/ompi_check_ucx.m4 b/config/ompi_check_ucx.m4 index 044b599dc3b..42e53f9ce80 100644 --- a/config/ompi_check_ucx.m4 +++ b/config/ompi_check_ucx.m4 @@ -135,9 +135,11 @@ AC_DEFUN([OMPI_CHECK_UCX],[ [$1_CPPFLAGS="[$]$1_CPPFLAGS $ompi_check_ucx_CPPFLAGS" $1_LDFLAGS="[$]$1_LDFLAGS $ompi_check_ucx_LDFLAGS" $1_LIBS="[$]$1_LIBS $ompi_check_ucx_LIBS" + AC_DEFINE([HAVE_UCX], [1], [have ucx]) $2], [AS_IF([test ! -z "$with_ucx" && test "$with_ucx" != "no"], [AC_MSG_ERROR([UCX support requested but not found. Aborting])]) + AC_DEFINE([HAVE_UCX], [0], [have ucx]) $3]) OPAL_VAR_SCOPE_POP diff --git a/opal/mca/btl/openib/btl_openib.c b/opal/mca/btl/openib/btl_openib.c index c2686a0676a..f9ba3a3de61 100644 --- a/opal/mca/btl/openib/btl_openib.c +++ b/opal/mca/btl/openib/btl_openib.c @@ -22,6 +22,7 @@ * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved + * Copyrigth (c) 2019 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -1040,15 +1041,6 @@ int mca_btl_openib_add_procs( int btl_rank = 0; volatile mca_btl_base_endpoint_t* endpoint; - - if (! openib_btl->allowed) { - opal_bitmap_clear_all_bits(reachable); - opal_show_help("help-mpi-btl-openib.txt", "ib port not selected", - true, opal_process_info.nodename, - openib_btl->device_name, openib_btl->port_num); - return OPAL_SUCCESS; - } - btl_rank = get_openib_btl_params(openib_btl, &lcl_subnet_id_port_cnt); if( 0 > btl_rank ){ return OPAL_ERR_NOT_FOUND; @@ -1648,81 +1640,80 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl) return OPAL_SUCCESS; } - if (openib_btl->allowed) { - /* Release all QPs */ - if (NULL != openib_btl->device->endpoints) { - for (ep_index=0; - ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints); - ep_index++) { - endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints, + /* Release all QPs */ + if (NULL != openib_btl->device->endpoints) { + for (ep_index=0; + ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints); + ep_index++) { + + endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints, ep_index); - if(!endpoint) { - BTL_VERBOSE(("In finalize, got another null endpoint")); - continue; - } - if(endpoint->endpoint_btl != openib_btl) { - continue; - } - for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) { - if(openib_btl->device->eager_rdma_buffers[i] == endpoint) { - openib_btl->device->eager_rdma_buffers[i] = NULL; - OBJ_RELEASE(endpoint); - } + if(!endpoint) { + BTL_VERBOSE(("In finalize, got another null endpoint")); + continue; + } + if(endpoint->endpoint_btl != openib_btl) { + continue; + } + for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) { + if(openib_btl->device->eager_rdma_buffers[i] == endpoint) { + openib_btl->device->eager_rdma_buffers[i] = NULL; + OBJ_RELEASE(endpoint); } - opal_pointer_array_set_item(openib_btl->device->endpoints, - ep_index, NULL); - assert(((opal_object_t*)endpoint)->obj_reference_count == 1); - OBJ_RELEASE(endpoint); } + opal_pointer_array_set_item(openib_btl->device->endpoints, + ep_index, NULL); + assert(((opal_object_t*)endpoint)->obj_reference_count == 1); + OBJ_RELEASE(endpoint); } + } - /* Release SRQ resources */ - for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) { - if(!BTL_OPENIB_QP_TYPE_PP(qp)) { - MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS( - &openib_btl->qps[qp].u.srq_qp.pending_frags[0]); - MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS( - &openib_btl->qps[qp].u.srq_qp.pending_frags[1]); - if (NULL != openib_btl->qps[qp].u.srq_qp.srq) { - opal_mutex_t *lock = - &mca_btl_openib_component.srq_manager.lock; - - opal_hash_table_t *srq_addr_table = - &mca_btl_openib_component.srq_manager.srq_addr_table; - - opal_mutex_lock(lock); - if (OPAL_SUCCESS != - opal_hash_table_remove_value_ptr(srq_addr_table, - &openib_btl->qps[qp].u.srq_qp.srq, - sizeof(struct ibv_srq *))) { - BTL_VERBOSE(("Failed to remove SRQ %d entry from hash table.", qp)); - rc = OPAL_ERROR; - } - opal_mutex_unlock(lock); - if (0 != ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)) { - BTL_VERBOSE(("Failed to close SRQ %d", qp)); - rc = OPAL_ERROR; - } - } + /* Release SRQ resources */ + for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) { + if(!BTL_OPENIB_QP_TYPE_PP(qp)) { + MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS( + &openib_btl->qps[qp].u.srq_qp.pending_frags[0]); + MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS( + &openib_btl->qps[qp].u.srq_qp.pending_frags[1]); + if (NULL != openib_btl->qps[qp].u.srq_qp.srq) { + opal_mutex_t *lock = + &mca_btl_openib_component.srq_manager.lock; - OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[0]); - OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[1]); - } - } + opal_hash_table_t *srq_addr_table = + &mca_btl_openib_component.srq_manager.srq_addr_table; - /* Finalize the CPC modules on this openib module */ - for (i = 0; i < openib_btl->num_cpcs; ++i) { - if (NULL != openib_btl->cpcs[i]->cbm_finalize) { - openib_btl->cpcs[i]->cbm_finalize(openib_btl, openib_btl->cpcs[i]); + opal_mutex_lock(lock); + if (OPAL_SUCCESS != + opal_hash_table_remove_value_ptr(srq_addr_table, + &openib_btl->qps[qp].u.srq_qp.srq, + sizeof(struct ibv_srq *))) { + BTL_VERBOSE(("Failed to remove SRQ %d entry from hash table.", qp)); + rc = OPAL_ERROR; + } + opal_mutex_unlock(lock); + if (0 != ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)) { + BTL_VERBOSE(("Failed to close SRQ %d", qp)); + rc = OPAL_ERROR; + } } - free(openib_btl->cpcs[i]); + + OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[0]); + OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[1]); } - free(openib_btl->cpcs); + } - /* Release device if there are no more users */ - if(!(--openib_btl->device->allowed_btls)) { - OBJ_RELEASE(openib_btl->device); + /* Finalize the CPC modules on this openib module */ + for (i = 0; i < openib_btl->num_cpcs; ++i) { + if (NULL != openib_btl->cpcs[i]->cbm_finalize) { + openib_btl->cpcs[i]->cbm_finalize(openib_btl, openib_btl->cpcs[i]); } + free(openib_btl->cpcs[i]); + } + free(openib_btl->cpcs); + + /* Release device if there are no more users */ + if(!(--openib_btl->device->allowed_btls)) { + OBJ_RELEASE(openib_btl->device); } if (NULL != openib_btl->qps) { diff --git a/opal/mca/btl/openib/btl_openib.h b/opal/mca/btl/openib/btl_openib.h index 0b85bfb5662..3ffc0feffce 100644 --- a/opal/mca/btl/openib/btl_openib.h +++ b/opal/mca/btl/openib/btl_openib.h @@ -20,6 +20,8 @@ * Copyright (c) 2014 Bull SAS. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyrigth (c) 2019 Triad National Security, LLC. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -506,8 +508,6 @@ struct mca_btl_openib_module_t { int local_procs; /** number of local procs */ bool atomic_ops_be; /** atomic result is big endian */ - - bool allowed; /** is this port allowed */ }; typedef struct mca_btl_openib_module_t mca_btl_openib_module_t; diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index fcc0ac56973..d93178fb537 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -22,6 +22,7 @@ * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. + * Copyrigth (c) 2019 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -278,9 +279,6 @@ static int btl_openib_modex_send(void) ); /* For each module, add in the size of the per-CPC data */ for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) { - if (! mca_btl_openib_component.openib_btls[i]->allowed) { - continue; - } for (j = 0; j < mca_btl_openib_component.openib_btls[i]->num_cpcs; ++j) { @@ -309,9 +307,6 @@ static int btl_openib_modex_send(void) /* Pack each of the modules */ for (i = 0; i < mca_btl_openib_component.ib_num_btls; i++) { - if (! mca_btl_openib_component.openib_btls[i]->allowed) { - continue; - } /* Pack the modex common message struct. */ size = modex_message_size; @@ -633,38 +628,26 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, * unless the user specifically requested to override this * policy. For ancient OFED, only allow if user has set * the MCA parameter. + * + * We emit a help message if Open MPI was configured without + * UCX support if the port is configured to use infiniband for link + * layer. If UCX support is available, don't emit help message + * since UCX PML has higher priority than OB1 and this BTL will + * not be used. */ - if (! mca_btl_openib_component.allow_ib + if (false == mca_btl_openib_component.allow_ib #if HAVE_DECL_IBV_LINK_LAYER_ETHERNET && IBV_LINK_LAYER_INFINIBAND == ib_port_attr->link_layer #endif ) { - openib_btl = (mca_btl_openib_module_t *) calloc(1, sizeof(mca_btl_openib_module_t)); - if(NULL == openib_btl) { - BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__)); - return OPAL_ERR_OUT_OF_RESOURCE; - } - memcpy(openib_btl, &mca_btl_openib_module, - sizeof(mca_btl_openib_module)); - ib_selected = OBJ_NEW(mca_btl_base_selected_module_t); - ib_selected->btl_module = (mca_btl_base_module_t*) openib_btl; - openib_btl->port_num = (uint8_t) port_num; - openib_btl->allowed = false; - openib_btl->device = NULL; - openib_btl->device_name = strdup(ibv_get_device_name(device->ib_dev)); - OBJ_CONSTRUCT(&openib_btl->ib_lock, opal_mutex_t); - opal_list_append(btl_list, (opal_list_item_t*) ib_selected); - opal_pointer_array_add(device->device_btls, (void*) openib_btl); - ++device->btls; - ++mca_btl_openib_component.ib_num_btls; - if (-1 != mca_btl_openib_component.ib_max_btls && - mca_btl_openib_component.ib_num_btls >= - mca_btl_openib_component.ib_max_btls) { - return OPAL_ERR_VALUE_OUT_OF_BOUNDS; - } - return OPAL_SUCCESS; - } - +#if !HAVE_UCX + opal_show_help("help-mpi-btl-openib.txt", "ib port not selected", + true, opal_process_info.nodename, + ibv_get_device_name(device->ib_dev), + port_num); +#endif + return OPAL_ERR_NOT_FOUND; + } /* Ensure that the requested GID index (via the btl_openib_gid_index MCA param) is within the GID table @@ -901,8 +884,6 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device, } } - openib_btl->allowed = true; - opal_list_append(btl_list, (opal_list_item_t*) ib_selected); opal_pointer_array_add(device->device_btls, (void*) openib_btl); ++device->btls; @@ -2999,29 +2980,27 @@ btl_openib_component_init(int *num_btl_modules, ib_selected = (mca_btl_base_selected_module_t*)item; openib_btl = (mca_btl_openib_module_t*)ib_selected->btl_module; - if (openib_btl->allowed) { - /* Search for a CPC that can handle this port */ - ret = opal_btl_openib_connect_base_select_for_local_port(openib_btl); - /* If we get NOT_SUPPORTED, then no CPC was found for this - port. But that's not a fatal error -- just keep going; - let's see if we find any usable openib modules or not. */ - if (OPAL_ERR_NOT_SUPPORTED == ret) { - continue; - } else if (OPAL_SUCCESS != ret) { - /* All others *are* fatal. Note that we already did a - show_help in the lower layer */ - goto no_btls; - } + /* Search for a CPC that can handle this port */ + ret = opal_btl_openib_connect_base_select_for_local_port(openib_btl); + /* If we get NOT_SUPPORTED, then no CPC was found for this + port. But that's not a fatal error -- just keep going; + let's see if we find any usable openib modules or not. */ + if (OPAL_ERR_NOT_SUPPORTED == ret) { + continue; + } else if (OPAL_SUCCESS != ret) { + /* All others *are* fatal. Note that we already did a + show_help in the lower layer */ + goto no_btls; + } - if (mca_btl_openib_component.max_hw_msg_size > 0 && - (uint32_t)mca_btl_openib_component.max_hw_msg_size > openib_btl->ib_port_attr.max_msg_sz) { - BTL_ERROR(("max_hw_msg_size (%" PRIu32 ") is larger than hw max message size (%" PRIu32 ")", - mca_btl_openib_component.max_hw_msg_size, openib_btl->ib_port_attr.max_msg_sz)); - } + if (mca_btl_openib_component.max_hw_msg_size > 0 && + (uint32_t)mca_btl_openib_component.max_hw_msg_size > openib_btl->ib_port_attr.max_msg_sz) { + BTL_ERROR(("max_hw_msg_size (%" PRIu32 ") is larger than hw max message size (%" PRIu32 ")", + mca_btl_openib_component.max_hw_msg_size, openib_btl->ib_port_attr.max_msg_sz)); + } - if (finish_btl_init(openib_btl) != OPAL_SUCCESS) { - goto no_btls; - } + if (finish_btl_init(openib_btl) != OPAL_SUCCESS) { + goto no_btls; } mca_btl_openib_component.openib_btls[i] = openib_btl; From 63605fc4669902a2b83bf0777898e461cf5b45a3 Mon Sep 17 00:00:00 2001 From: Tomislav Janjusic Date: Fri, 12 Jul 2019 22:23:33 +0300 Subject: [PATCH 37/94] v4.0.x OSC: Reset external request to NULL to avoid double request completion Co-authored with Artem Polyakov Signed-off-by: Tomislav Janjusic --- ompi/mca/osc/ucx/osc_ucx_request.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ompi/mca/osc/ucx/osc_ucx_request.c b/ompi/mca/osc/ucx/osc_ucx_request.c index efbd9c38cc6..4be050e3dcc 100644 --- a/ompi/mca/osc/ucx/osc_ucx_request.c +++ b/ompi/mca/osc/ucx/osc_ucx_request.c @@ -55,6 +55,7 @@ void req_completion(void *request, ucs_status_t status) { if(req->external_req != NULL) { ompi_request_complete(&(req->external_req->super), true); + req->external_req = NULL; ucp_request_release(req); mca_osc_ucx_component.num_incomplete_req_ops--; assert(mca_osc_ucx_component.num_incomplete_req_ops >= 0); From aae73d9cf7517693cfc7b9280f1687d30178b4d9 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Sat, 13 Jul 2019 18:36:12 +0900 Subject: [PATCH 38/94] fortran/mpif-h: fix C to Fortran error code conversion - remove incorrect use of OMPI_INT_2_FINT() - use homogenous syntax (e.g. c_ierr = PMPI_...()) Signed-off-by: Gilles Gouaillardet (cherry picked from commit open-mpi/ompi@223e6cc5377f968d135a44f0470a6de38740315c) --- ompi/mpi/fortran/mpif-h/comm_split_type_f.c | 14 +++++----- .../mpif-h/dist_graph_create_adjacent_f.c | 26 ++++++++++--------- ompi/mpi/fortran/mpif-h/dist_graph_create_f.c | 14 +++++----- .../mpif-h/dist_graph_neighbors_count_f.c | 14 +++++----- .../fortran/mpif-h/dist_graph_neighbors_f.c | 19 ++++++++------ ompi/mpi/fortran/mpif-h/improbe_f.c | 12 ++++----- ompi/mpi/fortran/mpif-h/imrecv_f.c | 8 +++--- ompi/mpi/fortran/mpif-h/mprobe_f.c | 10 +++---- ompi/mpi/fortran/mpif-h/mrecv_f.c | 8 +++--- 9 files changed, 67 insertions(+), 58 deletions(-) diff --git a/ompi/mpi/fortran/mpif-h/comm_split_type_f.c b/ompi/mpi/fortran/mpif-h/comm_split_type_f.c index c6eb7306a37..b35d45ac06f 100644 --- a/ompi/mpi/fortran/mpif-h/comm_split_type_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_split_type_f.c @@ -11,8 +11,8 @@ * All rights reserved. * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -77,11 +77,11 @@ void ompi_comm_split_type_f(MPI_Fint *comm, MPI_Fint *split_type, MPI_Fint *key, c_info = PMPI_Info_f2c(*info); - c_ierr = OMPI_INT_2_FINT(PMPI_Comm_split_type(c_comm, - OMPI_FINT_2_INT(*split_type), - OMPI_FINT_2_INT(*key), - c_info, - &c_newcomm )); + c_ierr = PMPI_Comm_split_type(c_comm, + OMPI_FINT_2_INT(*split_type), + OMPI_FINT_2_INT(*key), + c_info, + &c_newcomm); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/dist_graph_create_adjacent_f.c b/ompi/mpi/fortran/mpif-h/dist_graph_create_adjacent_f.c index f9668b379a8..1f2e6bc795a 100644 --- a/ompi/mpi/fortran/mpif-h/dist_graph_create_adjacent_f.c +++ b/ompi/mpi/fortran/mpif-h/dist_graph_create_adjacent_f.c @@ -8,8 +8,8 @@ * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -78,6 +78,7 @@ void ompi_dist_graph_create_adjacent_f(MPI_Fint *comm_old, MPI_Fint *indegree, MPI_Info c_info; MPI_Comm c_comm_old, c_comm_graph; int *c_destweights, *c_sourceweights; + int c_ierr; OMPI_ARRAY_NAME_DECL(sources); OMPI_ARRAY_NAME_DECL(destinations); @@ -105,16 +106,17 @@ void ompi_dist_graph_create_adjacent_f(MPI_Fint *comm_old, MPI_Fint *indegree, c_destweights = OMPI_ARRAY_NAME_CONVERT(destweights); } - *ierr = OMPI_INT_2_FINT(PMPI_Dist_graph_create_adjacent(c_comm_old, OMPI_FINT_2_INT(*indegree), - OMPI_ARRAY_NAME_CONVERT(sources), - c_sourceweights, - OMPI_FINT_2_INT(*outdegree), - OMPI_ARRAY_NAME_CONVERT(destinations), - c_destweights, - c_info, - OMPI_LOGICAL_2_INT(*reorder), - &c_comm_graph)); - if (OMPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) { + c_ierr = PMPI_Dist_graph_create_adjacent(c_comm_old, OMPI_FINT_2_INT(*indegree), + OMPI_ARRAY_NAME_CONVERT(sources), + c_sourceweights, + OMPI_FINT_2_INT(*outdegree), + OMPI_ARRAY_NAME_CONVERT(destinations), + c_destweights, + c_info, + OMPI_LOGICAL_2_INT(*reorder), + &c_comm_graph); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + if (OMPI_SUCCESS == c_ierr) { *comm_graph = PMPI_Comm_c2f(c_comm_graph); } diff --git a/ompi/mpi/fortran/mpif-h/dist_graph_create_f.c b/ompi/mpi/fortran/mpif-h/dist_graph_create_f.c index 2692f9b7d06..3380be8a537 100644 --- a/ompi/mpi/fortran/mpif-h/dist_graph_create_f.c +++ b/ompi/mpi/fortran/mpif-h/dist_graph_create_f.c @@ -7,8 +7,8 @@ * Copyright (c) 2011-2013 Université Bordeaux 1 * Copyright (c) 2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -75,6 +75,7 @@ void ompi_dist_graph_create_f(MPI_Fint *comm_old, MPI_Fint *n, MPI_Fint *sources int count = 0, i; MPI_Info c_info; int *c_weights; + int c_ierr; OMPI_ARRAY_NAME_DECL(sources); OMPI_ARRAY_NAME_DECL(degrees); @@ -98,10 +99,11 @@ void ompi_dist_graph_create_f(MPI_Fint *comm_old, MPI_Fint *n, MPI_Fint *sources } - *ierr = OMPI_INT_2_FINT(PMPI_Dist_graph_create(c_comm_old, OMPI_FINT_2_INT(*n), OMPI_ARRAY_NAME_CONVERT(sources), - OMPI_ARRAY_NAME_CONVERT(degrees), OMPI_ARRAY_NAME_CONVERT(destinations), - c_weights, c_info, OMPI_LOGICAL_2_INT(*reorder), &c_comm_graph)); - if (OMPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) { + c_ierr = PMPI_Dist_graph_create(c_comm_old, OMPI_FINT_2_INT(*n), OMPI_ARRAY_NAME_CONVERT(sources), + OMPI_ARRAY_NAME_CONVERT(degrees), OMPI_ARRAY_NAME_CONVERT(destinations), + c_weights, c_info, OMPI_LOGICAL_2_INT(*reorder), &c_comm_graph); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + if (OMPI_SUCCESS == c_ierr) { *comm_graph = PMPI_Comm_c2f(c_comm_graph); } diff --git a/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_count_f.c b/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_count_f.c index 4f8611e783a..aad5aac5f80 100644 --- a/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_count_f.c +++ b/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_count_f.c @@ -4,7 +4,7 @@ * reserved. * Copyright (c) 2011-2013 Inria. All rights reserved. * Copyright (c) 2011-2013 Université Bordeaux 1 - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -69,15 +69,17 @@ void ompi_dist_graph_neighbors_count_f(MPI_Fint *comm, MPI_Fint *inneighbors, OMPI_SINGLE_NAME_DECL(inneighbors); OMPI_SINGLE_NAME_DECL(outneighbors); OMPI_LOGICAL_NAME_DECL(weighted); + int c_ierr; c_comm = PMPI_Comm_f2c(*comm); - *ierr = OMPI_INT_2_FINT(PMPI_Dist_graph_neighbors_count(c_comm, - OMPI_SINGLE_NAME_CONVERT(inneighbors), - OMPI_SINGLE_NAME_CONVERT(outneighbors), - OMPI_LOGICAL_SINGLE_NAME_CONVERT(weighted))); + c_ierr = PMPI_Dist_graph_neighbors_count(c_comm, + OMPI_SINGLE_NAME_CONVERT(inneighbors), + OMPI_SINGLE_NAME_CONVERT(outneighbors), + OMPI_LOGICAL_SINGLE_NAME_CONVERT(weighted)); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); OMPI_SINGLE_INT_2_LOGICAL(weighted); - if (OMPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) { + if (OMPI_SUCCESS == c_ierr) { OMPI_SINGLE_INT_2_FINT(inneighbors); OMPI_SINGLE_INT_2_FINT(outneighbors); } diff --git a/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_f.c b/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_f.c index 5309b322c35..556d909ad1d 100644 --- a/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_f.c +++ b/ompi/mpi/fortran/mpif-h/dist_graph_neighbors_f.c @@ -4,8 +4,8 @@ * reserved. * Copyright (c) 2011-2013 Inria. All rights reserved. * Copyright (c) 2011-2013 Université Bordeaux 1 - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -74,6 +74,7 @@ void ompi_dist_graph_neighbors_f(MPI_Fint* comm, MPI_Fint* maxindegree, OMPI_ARRAY_NAME_DECL(sourceweights); OMPI_ARRAY_NAME_DECL(destinations); OMPI_ARRAY_NAME_DECL(destweights); + int c_ierr; c_comm = PMPI_Comm_f2c(*comm); @@ -86,12 +87,14 @@ void ompi_dist_graph_neighbors_f(MPI_Fint* comm, MPI_Fint* maxindegree, OMPI_ARRAY_FINT_2_INT_ALLOC(destweights, *maxoutdegree); } - *ierr = OMPI_INT_2_FINT(PMPI_Dist_graph_neighbors(c_comm, OMPI_FINT_2_INT(*maxindegree), - OMPI_ARRAY_NAME_CONVERT(sources), - OMPI_IS_FORTRAN_UNWEIGHTED(sourceweights) ? MPI_UNWEIGHTED : OMPI_ARRAY_NAME_CONVERT(sourceweights), - OMPI_FINT_2_INT(*maxoutdegree), OMPI_ARRAY_NAME_CONVERT(destinations), - OMPI_IS_FORTRAN_UNWEIGHTED(destweights) ? MPI_UNWEIGHTED : OMPI_ARRAY_NAME_CONVERT(destweights))); - if (OMPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) { + c_ierr = PMPI_Dist_graph_neighbors(c_comm, OMPI_FINT_2_INT(*maxindegree), + OMPI_ARRAY_NAME_CONVERT(sources), + OMPI_IS_FORTRAN_UNWEIGHTED(sourceweights) ? MPI_UNWEIGHTED : OMPI_ARRAY_NAME_CONVERT(sourceweights), + OMPI_FINT_2_INT(*maxoutdegree), OMPI_ARRAY_NAME_CONVERT(destinations), + OMPI_IS_FORTRAN_UNWEIGHTED(destweights) ? MPI_UNWEIGHTED : OMPI_ARRAY_NAME_CONVERT(destweights)); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (OMPI_SUCCESS == c_ierr) { OMPI_ARRAY_INT_2_FINT(sources, *maxindegree); if( !OMPI_IS_FORTRAN_UNWEIGHTED(sourceweights) ) { OMPI_ARRAY_INT_2_FINT(sourceweights, *maxindegree); diff --git a/ompi/mpi/fortran/mpif-h/improbe_f.c b/ompi/mpi/fortran/mpif-h/improbe_f.c index 8d7764fffd1..936cc4e399c 100644 --- a/ompi/mpi/fortran/mpif-h/improbe_f.c +++ b/ompi/mpi/fortran/mpif-h/improbe_f.c @@ -11,8 +11,8 @@ * All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * @@ -87,10 +87,10 @@ void ompi_improbe_f(MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = OMPI_INT_2_FINT(PMPI_Improbe(OMPI_FINT_2_INT(*source), - OMPI_FINT_2_INT(*tag), - c_comm, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag), - &c_message, c_status)); + c_ierr = PMPI_Improbe(OMPI_FINT_2_INT(*source), + OMPI_FINT_2_INT(*tag), + c_comm, OMPI_LOGICAL_SINGLE_NAME_CONVERT(flag), + &c_message, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/imrecv_f.c b/ompi/mpi/fortran/mpif-h/imrecv_f.c index 4ba7a13a2ad..2f706d941c1 100644 --- a/ompi/mpi/fortran/mpif-h/imrecv_f.c +++ b/ompi/mpi/fortran/mpif-h/imrecv_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2015 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * @@ -80,8 +80,8 @@ void ompi_imrecv_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, c_message = PMPI_Message_f2c(*message); - c_ierr = OMPI_INT_2_FINT(PMPI_Imrecv(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), - c_type, &c_message, &c_req)); + c_ierr = PMPI_Imrecv(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_type, &c_message, &c_req); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/mprobe_f.c b/ompi/mpi/fortran/mpif-h/mprobe_f.c index db39bce941c..0558e9543f8 100644 --- a/ompi/mpi/fortran/mpif-h/mprobe_f.c +++ b/ompi/mpi/fortran/mpif-h/mprobe_f.c @@ -12,7 +12,7 @@ * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ @@ -86,10 +86,10 @@ void ompi_mprobe_f(MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) - c_ierr = OMPI_INT_2_FINT(PMPI_Mprobe(OMPI_FINT_2_INT(*source), - OMPI_FINT_2_INT(*tag), - c_comm, &c_message, - c_status)); + c_ierr = PMPI_Mprobe(OMPI_FINT_2_INT(*source), + OMPI_FINT_2_INT(*tag), + c_comm, &c_message, + c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { diff --git a/ompi/mpi/fortran/mpif-h/mrecv_f.c b/ompi/mpi/fortran/mpif-h/mrecv_f.c index 33a122510cd..8a898ce7f5d 100644 --- a/ompi/mpi/fortran/mpif-h/mrecv_f.c +++ b/ompi/mpi/fortran/mpif-h/mrecv_f.c @@ -77,16 +77,16 @@ void ompi_mrecv_f(char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *message, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; - OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) + OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) MPI_Message c_message = PMPI_Message_f2c(*message); MPI_Datatype c_type = PMPI_Type_f2c(*datatype); OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) /* Call the C function */ - c_ierr = OMPI_INT_2_FINT(PMPI_Mrecv(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), - c_type, &c_message, - c_status)); + c_ierr = PMPI_Mrecv(OMPI_F2C_BOTTOM(buf), OMPI_FINT_2_INT(*count), + c_type, &c_message, + c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { From fbf7d31fd18e09ee5acfebcb03414920ada0cc17 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Sat, 13 Jul 2019 22:08:11 +0900 Subject: [PATCH 39/94] fortran/mpif-h: fix MPI_[I]Alltoallw() binding - ignore sendcounts, sendispls and sendtypes arguments when MPI_IN_PLACE is used - use the right size when an inter-communicator is used. Thanks Markus Geimer for reporting this. Refs. open-mpi/ompi#5459 Signed-off-by: Gilles Gouaillardet (cherry picked from commit open-mpi/ompi@cdaed89d0481e8352a6c1a6cb8f71a250ea9352b) --- ompi/mpi/fortran/mpif-h/alltoallw_f.c | 29 +++++++++++++---------- ompi/mpi/fortran/mpif-h/ialltoallw_f.c | 32 +++++++++++++++----------- 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/ompi/mpi/fortran/mpif-h/alltoallw_f.c b/ompi/mpi/fortran/mpif-h/alltoallw_f.c index cb2328cf972..581eb7288e1 100644 --- a/ompi/mpi/fortran/mpif-h/alltoallw_f.c +++ b/ompi/mpi/fortran/mpif-h/alltoallw_f.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science + * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -23,6 +23,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" +#include "ompi/communicator/communicator.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -82,20 +83,22 @@ void ompi_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_NAME_DECL(rdispls); c_comm = PMPI_Comm_f2c(*comm); - PMPI_Comm_size(c_comm, &size); + size = OMPI_COMM_IS_INTER(c_comm)?ompi_comm_remote_size(c_comm):ompi_comm_size(c_comm); - c_sendtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); - c_recvtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); + if (!OMPI_IS_FORTRAN_IN_PLACE(sendbuf)) { + c_sendtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); + OMPI_ARRAY_FINT_2_INT(sendcounts, size); + OMPI_ARRAY_FINT_2_INT(sdispls, size); + for (int i=0; i 0) { - c_sendtypes[size - 1] = PMPI_Type_f2c(sendtypes[size - 1]); - c_recvtypes[size - 1] = PMPI_Type_f2c(recvtypes[size - 1]); - --size; + for (int i=0; i 0) { - c_sendtypes[size - 1] = PMPI_Type_f2c(sendtypes[size - 1]); - c_recvtypes[size - 1] = PMPI_Type_f2c(recvtypes[size - 1]); - --size; + for (int i=0; i Date: Wed, 17 Jul 2019 09:28:43 +0900 Subject: [PATCH 40/94] fortran/mpif-h: fix [i]alltoallw bindings Fix a regression introduced in open-mpi/ompi@cdaed89d0481e8352a6c1a6cb8f71a250ea9352b Fixes CID 1451610, 1451611 and 1451612 Signed-off-by: Gilles Gouaillardet (cherry picked from commit open-mpi/ompi@ed703bec1bcfc5425598ccc197a3735e6cef19d8) --- ompi/mpi/fortran/mpif-h/alltoallw_f.c | 4 ++-- ompi/mpi/fortran/mpif-h/ialltoallw_f.c | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/ompi/mpi/fortran/mpif-h/alltoallw_f.c b/ompi/mpi/fortran/mpif-h/alltoallw_f.c index 581eb7288e1..55b782a7928 100644 --- a/ompi/mpi/fortran/mpif-h/alltoallw_f.c +++ b/ompi/mpi/fortran/mpif-h/alltoallw_f.c @@ -75,7 +75,7 @@ void ompi_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *comm, MPI_Fint *ierr) { MPI_Comm c_comm; - MPI_Datatype *c_sendtypes, *c_recvtypes; + MPI_Datatype *c_sendtypes = NULL, *c_recvtypes; int size, c_ierr; OMPI_ARRAY_NAME_DECL(sendcounts); OMPI_ARRAY_NAME_DECL(sdispls); @@ -119,7 +119,7 @@ void ompi_alltoallw_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_FINT_2_INT_CLEANUP(sdispls); OMPI_ARRAY_FINT_2_INT_CLEANUP(recvcounts); OMPI_ARRAY_FINT_2_INT_CLEANUP(rdispls); - if (MPI_IN_PLACE != sendbuf) { + if (NULL != c_sendtypes) { free(c_sendtypes); } free(c_recvtypes); diff --git a/ompi/mpi/fortran/mpif-h/ialltoallw_f.c b/ompi/mpi/fortran/mpif-h/ialltoallw_f.c index 2aed76900e8..75f8262bef5 100644 --- a/ompi/mpi/fortran/mpif-h/ialltoallw_f.c +++ b/ompi/mpi/fortran/mpif-h/ialltoallw_f.c @@ -75,7 +75,7 @@ void ompi_ialltoallw_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr) { MPI_Comm c_comm; - MPI_Datatype *c_sendtypes, *c_recvtypes; + MPI_Datatype *c_sendtypes = NULL, *c_recvtypes; MPI_Request c_request; int size, c_ierr; OMPI_ARRAY_NAME_DECL(sendcounts); @@ -101,7 +101,6 @@ void ompi_ialltoallw_f(char *sendbuf, MPI_Fint *sendcounts, for (int i=0; i Date: Wed, 17 Jul 2019 09:30:09 +0900 Subject: [PATCH 41/94] pcollreq/mpif-h: fix MPIX_Alltoallw_init() binding Signed-off-by: Gilles Gouaillardet (cherry picked from commit open-mpi/ompi@b71af0eca0ed8b5acf2bdddc720e163cf491422a) --- .../mpiext/pcollreq/mpif-h/alltoallw_init_f.c | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c b/ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c index 183d739f797..a90047c093f 100644 --- a/ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c +++ b/ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2015-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,6 +23,7 @@ #include "ompi/mpi/fortran/mpif-h/bindings.h" #include "ompi/mpi/fortran/base/constants.h" +#include "ompi/communicator/communicator.h" #include "ompi/mpiext/pcollreq/mpif-h/mpiext_pcollreq_prototypes.h" #if OMPI_BUILD_MPI_PROFILING @@ -85,22 +86,22 @@ void ompix_alltoallw_init_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_NAME_DECL(rdispls); c_comm = PMPI_Comm_f2c(*comm); - PMPI_Comm_size(c_comm, &size); + size = OMPI_COMM_IS_INTER(c_comm)?ompi_comm_remote_size(c_comm):ompi_comm_size(c_comm); + + if (!OMPI_IS_FORTRAN_IN_PLACE(sendbuf)) { + c_sendtypes = (MPI_Datatype *) malloc(size * sizeof(MPI_Datatype)); + OMPI_ARRAY_FINT_2_INT(sendcounts, size); + OMPI_ARRAY_FINT_2_INT(sdispls, size); + for (int i=0; i 0) { - c_sendtypes[size - 1] = PMPI_Type_f2c(sendtypes[size - 1]); - c_recvtypes[size - 1] = PMPI_Type_f2c(recvtypes[size - 1]); - --size; + for (int i=0; i Date: Tue, 23 Jul 2019 08:45:17 +0900 Subject: [PATCH 42/94] pcollreq/mpif-h: fix MPIX_Alltoallw_init() binding These issues were introduced in the recent commit b71af0eca0. This commit fixes Coverity CID 1451661 and 1451660. Though `c_info` part was an actual bug, the `c_sendtypes` part was not. Signed-off-by: KAWASHIMA Takahiro (cherry picked from commit open-mpi/ompi@facf8c5e98b22a615d50a56f0a2ae94515ef0bad) --- ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c b/ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c index a90047c093f..0fae1e194db 100644 --- a/ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c +++ b/ompi/mpiext/pcollreq/mpif-h/alltoallw_init_f.c @@ -76,7 +76,7 @@ void ompix_alltoallw_init_f(char *sendbuf, MPI_Fint *sendcounts, MPI_Fint *comm, MPI_Fint *info, MPI_Fint *request, MPI_Fint *ierr) { MPI_Comm c_comm; - MPI_Datatype *c_sendtypes, *c_recvtypes; + MPI_Datatype *c_sendtypes = NULL, *c_recvtypes; MPI_Info c_info; MPI_Request c_request; int size, c_ierr; @@ -86,6 +86,7 @@ void ompix_alltoallw_init_f(char *sendbuf, MPI_Fint *sendcounts, OMPI_ARRAY_NAME_DECL(rdispls); c_comm = PMPI_Comm_f2c(*comm); + c_info = PMPI_Info_f2c(*info); size = OMPI_COMM_IS_INTER(c_comm)?ompi_comm_remote_size(c_comm):ompi_comm_size(c_comm); if (!OMPI_IS_FORTRAN_IN_PLACE(sendbuf)) { From 0422b23f3509b98ad5bcbff1927f93bfdc37cec6 Mon Sep 17 00:00:00 2001 From: Austen Lauria Date: Thu, 18 Jul 2019 18:45:50 -0400 Subject: [PATCH 43/94] Try to prevent the compiler from optimizing out MPIR_Breakpoint(). Signed-off-by: Austen Lauria (cherry picked from commit 00106f5ac96a3d9e6288ec07dc47e325897cd5f8) Signed-off-by: Austen Lauria --- orte/orted/orted_submit.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index ccc089e51cb..33eddc5818d 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -175,12 +175,30 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata); ORTE_DECLSPEC void* __opal_attribute_optnone__ MPIR_Breakpoint(void); +/* + * Attempt to prevent the compiler from optimizing out + * MPIR_Breakpoint(). + * + * Some older versions of automake can add -O3 to every + * file via CFLAGS (which was demonstrated in automake v1.13.4), + * so there is a possibility that the compiler will see + * this function as a NOOP and optimize it out on older versions. + * While using the current/recommended version of automake + * does not do this, the following will help those + * stuck with an older version, as well as guard against + * future regressions. + * + * See the following git issue for more discussion: + * https://github.com/open-mpi/ompi/issues/5501 + */ +static volatile void* volatile noop_mpir_breakpoint_ptr = NULL; + /* * Breakpoint function for parallel debuggers */ void* MPIR_Breakpoint(void) { - return NULL; + return noop_mpir_breakpoint_ptr; } /* local objects */ From 3d5b7b4a1b5473c0aa235baaca1388d8790804b5 Mon Sep 17 00:00:00 2001 From: Mikhail Brinskii Date: Tue, 2 Jul 2019 16:04:03 +0300 Subject: [PATCH 44/94] COLL/TUNED: Update alltoall selection rule for mlx Use linear with sync alltoall algorithm for certain message/comm size ranges. Does not affect default fixed decision, unless HPCX (with its custom parameters) is used or corresponding mca is set. Signed-off-by: Mikhail Brinskii (cherry picked from commit 404c4800688548b021bda68bdf10792424e6b1c5) --- contrib/platform/mellanox/optimized.conf | 5 ++++ ompi/mca/coll/tuned/coll_tuned.h | 3 +++ .../coll/tuned/coll_tuned_alltoall_decision.c | 27 ++++++++++++++----- ompi/mca/coll/tuned/coll_tuned_component.c | 7 +++++ .../coll/tuned/coll_tuned_decision_fixed.c | 6 +++++ 5 files changed, 41 insertions(+), 7 deletions(-) diff --git a/contrib/platform/mellanox/optimized.conf b/contrib/platform/mellanox/optimized.conf index c58428cf6ad..986db6f6b39 100644 --- a/contrib/platform/mellanox/optimized.conf +++ b/contrib/platform/mellanox/optimized.conf @@ -78,3 +78,8 @@ opal_event_include=epoll bml_r2_show_unreach_errors = 0 +# alltoall algorithm selection settings for tuned coll mca +coll_tuned_alltoall_large_msg = 250000 +coll_tuned_alltoall_min_procs = 2048 +coll_tuned_alltoall_algorithm_max_requests = 8 + diff --git a/ompi/mca/coll/tuned/coll_tuned.h b/ompi/mca/coll/tuned/coll_tuned.h index d4b201bc7a3..7ae039c9809 100644 --- a/ompi/mca/coll/tuned/coll_tuned.h +++ b/ompi/mca/coll/tuned/coll_tuned.h @@ -38,6 +38,9 @@ extern int ompi_coll_tuned_init_chain_fanout; extern int ompi_coll_tuned_init_max_requests; extern int ompi_coll_tuned_alltoall_small_msg; extern int ompi_coll_tuned_alltoall_intermediate_msg; +extern int ompi_coll_tuned_alltoall_large_msg; +extern int ompi_coll_tuned_alltoall_min_procs; +extern int ompi_coll_tuned_alltoall_max_reqs; /* forced algorithm choices */ /* this structure is for storing the indexes to the forced algorithm mca params... */ diff --git a/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c b/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c index 2ef1e6b9038..86c16be5352 100644 --- a/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c @@ -28,7 +28,6 @@ /* alltoall algorithm variables */ static int coll_tuned_alltoall_forced_algorithm = 0; static int coll_tuned_alltoall_segment_size = 0; -static int coll_tuned_alltoall_max_requests; static int coll_tuned_alltoall_tree_fanout; static int coll_tuned_alltoall_chain_fanout; @@ -115,7 +114,22 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm MCA_BASE_VAR_SCOPE_ALL, &coll_tuned_alltoall_chain_fanout); - coll_tuned_alltoall_max_requests = 0; /* no limit for alltoall by default */ + (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, + "alltoall_large_msg", + "threshold (if supported) to decide if large MSGs alltoall algorithm will be used", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_6, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_coll_tuned_alltoall_large_msg); + + (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, + "alltoall_min_procs", + "threshold (if supported) to decide if many processes alltoall algorithm will be used", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_6, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_coll_tuned_alltoall_min_procs); + mca_param_indices->max_requests_param_index = mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, "alltoall_algorithm_max_requests", @@ -123,17 +137,16 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL, - &coll_tuned_alltoall_max_requests); + &ompi_coll_tuned_alltoall_max_reqs); if (mca_param_indices->max_requests_param_index < 0) { return mca_param_indices->max_requests_param_index; } - if (coll_tuned_alltoall_max_requests < 0) { + if (ompi_coll_tuned_alltoall_max_reqs < 0) { if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) { - opal_output( 0, "Maximum outstanding requests must be positive number greater than 1. Switching to system level default %d \n", - ompi_coll_tuned_init_max_requests ); + opal_output( 0, "Maximum outstanding requests must be positive number greater than 1. Switching to 0 \n"); } - coll_tuned_alltoall_max_requests = 0; + ompi_coll_tuned_alltoall_max_reqs = 0; } return (MPI_SUCCESS); diff --git a/ompi/mca/coll/tuned/coll_tuned_component.c b/ompi/mca/coll/tuned/coll_tuned_component.c index be0d14a988f..3de5aedfe29 100644 --- a/ompi/mca/coll/tuned/coll_tuned_component.c +++ b/ompi/mca/coll/tuned/coll_tuned_component.c @@ -57,6 +57,13 @@ int ompi_coll_tuned_init_max_requests = 128; int ompi_coll_tuned_alltoall_small_msg = 200; int ompi_coll_tuned_alltoall_intermediate_msg = 3000; +/* Set it to intermediate value by default, so it does not affect default + * algorithm selection. Changing this value will force using linear with sync + * algorithm on certain message sizes. */ +int ompi_coll_tuned_alltoall_large_msg = 3000; +int ompi_coll_tuned_alltoall_min_procs = 0; /* not used by default */ +int ompi_coll_tuned_alltoall_max_reqs = 0; /* no limit for alltoall by default */ + /* forced alogrithm variables */ /* indices for the MCA parameters */ coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT] = {{0}}; diff --git a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c index 0150fcc3b49..2518afee981 100644 --- a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c +++ b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c @@ -136,6 +136,12 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(const void *sbuf, int scount, return ompi_coll_base_alltoall_intra_basic_linear(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module); + } else if ((block_dsize < (size_t) ompi_coll_tuned_alltoall_large_msg) && + (communicator_size <= ompi_coll_tuned_alltoall_min_procs)) { + return ompi_coll_base_alltoall_intra_linear_sync(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module, + ompi_coll_tuned_alltoall_max_reqs); } return ompi_coll_base_alltoall_intra_pairwise(sbuf, scount, sdtype, From b9998a14dc2a5c87b55405d753a28656c89a9851 Mon Sep 17 00:00:00 2001 From: Mikhail Brinskii Date: Wed, 24 Jul 2019 10:23:38 +0000 Subject: [PATCH 45/94] COLL/TUNED: Minor var names/comments fixes Signed-off-by: Mikhail Brinskii (cherry picked from commit 65618f8db848613c95cbe112033df94721d326a8) --- ompi/mca/coll/tuned/coll_tuned.h | 2 +- ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c | 10 +++++----- ompi/mca/coll/tuned/coll_tuned_component.c | 10 +++++----- ompi/mca/coll/tuned/coll_tuned_decision_fixed.c | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ompi/mca/coll/tuned/coll_tuned.h b/ompi/mca/coll/tuned/coll_tuned.h index 7ae039c9809..d6fc4b89bde 100644 --- a/ompi/mca/coll/tuned/coll_tuned.h +++ b/ompi/mca/coll/tuned/coll_tuned.h @@ -40,7 +40,7 @@ extern int ompi_coll_tuned_alltoall_small_msg; extern int ompi_coll_tuned_alltoall_intermediate_msg; extern int ompi_coll_tuned_alltoall_large_msg; extern int ompi_coll_tuned_alltoall_min_procs; -extern int ompi_coll_tuned_alltoall_max_reqs; +extern int ompi_coll_tuned_alltoall_max_requests; /* forced algorithm choices */ /* this structure is for storing the indexes to the forced algorithm mca params... */ diff --git a/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c b/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c index 86c16be5352..b63037e1237 100644 --- a/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c @@ -116,7 +116,7 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, "alltoall_large_msg", - "threshold (if supported) to decide if large MSGs alltoall algorithm will be used", + "use pairwise exchange algorithm for messages larger than this value", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_6, MCA_BASE_VAR_SCOPE_READONLY, @@ -124,7 +124,7 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, "alltoall_min_procs", - "threshold (if supported) to decide if many processes alltoall algorithm will be used", + "use pairwise exchange algorithm for communicators larger than this value", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_6, MCA_BASE_VAR_SCOPE_READONLY, @@ -137,16 +137,16 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL, - &ompi_coll_tuned_alltoall_max_reqs); + &ompi_coll_tuned_alltoall_max_requests); if (mca_param_indices->max_requests_param_index < 0) { return mca_param_indices->max_requests_param_index; } - if (ompi_coll_tuned_alltoall_max_reqs < 0) { + if (ompi_coll_tuned_alltoall_max_requests < 0) { if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) { opal_output( 0, "Maximum outstanding requests must be positive number greater than 1. Switching to 0 \n"); } - ompi_coll_tuned_alltoall_max_reqs = 0; + ompi_coll_tuned_alltoall_max_requests = 0; } return (MPI_SUCCESS); diff --git a/ompi/mca/coll/tuned/coll_tuned_component.c b/ompi/mca/coll/tuned/coll_tuned_component.c index 3de5aedfe29..25e9bc77a0d 100644 --- a/ompi/mca/coll/tuned/coll_tuned_component.c +++ b/ompi/mca/coll/tuned/coll_tuned_component.c @@ -57,12 +57,12 @@ int ompi_coll_tuned_init_max_requests = 128; int ompi_coll_tuned_alltoall_small_msg = 200; int ompi_coll_tuned_alltoall_intermediate_msg = 3000; -/* Set it to intermediate value by default, so it does not affect default - * algorithm selection. Changing this value will force using linear with sync - * algorithm on certain message sizes. */ +/* Set it to the same value as intermediate msg by default, so it does not affect + * default algorithm selection. Changing this value will force using linear with + * sync algorithm on certain message sizes. */ int ompi_coll_tuned_alltoall_large_msg = 3000; -int ompi_coll_tuned_alltoall_min_procs = 0; /* not used by default */ -int ompi_coll_tuned_alltoall_max_reqs = 0; /* no limit for alltoall by default */ +int ompi_coll_tuned_alltoall_min_procs = 0; /* disable by default */ +int ompi_coll_tuned_alltoall_max_requests = 0; /* no limit for alltoall by default */ /* forced alogrithm variables */ /* indices for the MCA parameters */ diff --git a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c index 2518afee981..97560c5c089 100644 --- a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c +++ b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c @@ -141,7 +141,7 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(const void *sbuf, int scount, return ompi_coll_base_alltoall_intra_linear_sync(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module, - ompi_coll_tuned_alltoall_max_reqs); + ompi_coll_tuned_alltoall_max_requests); } return ompi_coll_base_alltoall_intra_pairwise(sbuf, scount, sdtype, From 359cdf2b539ffd5b2266749c8ddfb56cde3ba9af Mon Sep 17 00:00:00 2001 From: "Nysal Jan K.A" Date: Wed, 24 Jul 2019 13:04:41 +0530 Subject: [PATCH 46/94] osc/ucx: Fix data corruption with non-contiguous accumulates Signed-off-by: Nysal Jan K.A (cherry picked from commit 3529d447020684ab305411caa97423826bb40906) --- ompi/mca/osc/ucx/osc_ucx_comm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ompi/mca/osc/ucx/osc_ucx_comm.c b/ompi/mca/osc/ucx/osc_ucx_comm.c index 55af123fbb4..adedae5c3ec 100644 --- a/ompi/mca/osc/ucx/osc_ucx_comm.c +++ b/ompi/mca/osc/ucx/osc_ucx_comm.c @@ -566,12 +566,13 @@ int ompi_osc_ucx_accumulate(const void *origin_addr, int origin_count, if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) || ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) { size_t temp_size; + char *curr_temp_addr = (char *)temp_addr; ompi_datatype_type_size(temp_dt, &temp_size); while (origin_ucx_iov_idx < origin_ucx_iov_count) { int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size; ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr, - temp_addr, curr_count, temp_dt); - temp_addr = (void *)((char *)temp_addr + curr_count * temp_size); + curr_temp_addr, curr_count, temp_dt); + curr_temp_addr += curr_count * temp_size; origin_ucx_iov_idx++; } } else { @@ -811,12 +812,13 @@ int ompi_osc_ucx_get_accumulate(const void *origin_addr, int origin_count, if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) || ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) { size_t temp_size; + char *curr_temp_addr = (char *)temp_addr; ompi_datatype_type_size(temp_dt, &temp_size); while (origin_ucx_iov_idx < origin_ucx_iov_count) { int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size; ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr, - temp_addr, curr_count, temp_dt); - temp_addr = (void *)((char *)temp_addr + curr_count * temp_size); + curr_temp_addr, curr_count, temp_dt); + curr_temp_addr += curr_count * temp_size; origin_ucx_iov_idx++; } } else { From f68b06e9ee01d79469d691019e4aca7535ceb4a2 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 9 May 2019 16:27:49 -0400 Subject: [PATCH 47/94] Fix incorrect behavior with length == 0 Fixes #6575. Signed-off-by: George Bosilca --- .../ompi_datatype_create_contiguous.c | 13 ++- ompi/datatype/ompi_datatype_create_darray.c | 4 +- ompi/datatype/ompi_datatype_create_indexed.c | 79 +++++++++---------- ompi/datatype/ompi_datatype_create_struct.c | 38 ++++----- ompi/datatype/ompi_datatype_create_vector.c | 21 ++--- 5 files changed, 68 insertions(+), 87 deletions(-) diff --git a/ompi/datatype/ompi_datatype_create_contiguous.c b/ompi/datatype/ompi_datatype_create_contiguous.c index fb44673ef5c..6a287caa41c 100644 --- a/ompi/datatype/ompi_datatype_create_contiguous.c +++ b/ompi/datatype/ompi_datatype_create_contiguous.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -29,13 +29,12 @@ int32_t ompi_datatype_create_contiguous( int count, const ompi_datatype_t* oldTy { ompi_datatype_t* pdt; - if( 0 == count ) { - pdt = ompi_datatype_create( 0 ); - ompi_datatype_add( pdt, &ompi_mpi_datatype_null.dt, 0, 0, 0 ); - } else { - pdt = ompi_datatype_create( oldType->super.desc.used + 2 ); - opal_datatype_add( &(pdt->super), &(oldType->super), count, 0, (oldType->super.ub - oldType->super.lb) ); + if( (0 == count) || (0 == oldType->super.size) ) { + return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } + + pdt = ompi_datatype_create( oldType->super.desc.used + 2 ); + opal_datatype_add( &(pdt->super), &(oldType->super), count, 0, (oldType->super.ub - oldType->super.lb) ); *newType = pdt; return OMPI_SUCCESS; } diff --git a/ompi/datatype/ompi_datatype_create_darray.c b/ompi/datatype/ompi_datatype_create_darray.c index a245dcebce4..e0292755c4b 100644 --- a/ompi/datatype/ompi_datatype_create_darray.c +++ b/ompi/datatype/ompi_datatype_create_darray.c @@ -192,9 +192,7 @@ int32_t ompi_datatype_create_darray(int size, if (ndims < 1) { /* Don't just return MPI_DATATYPE_NULL as that can't be MPI_TYPE_FREE()ed, and that seems bad */ - *newtype = ompi_datatype_create(0); - ompi_datatype_add(*newtype, &ompi_mpi_datatype_null.dt, 0, 0, 0); - return MPI_SUCCESS; + return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newtype); } rc = ompi_datatype_type_extent(oldtype, &orig_extent); diff --git a/ompi/datatype/ompi_datatype_create_indexed.c b/ompi/datatype/ompi_datatype_create_indexed.c index 457efb1e6ff..e72b41afc7d 100644 --- a/ompi/datatype/ompi_datatype_create_indexed.c +++ b/ompi/datatype/ompi_datatype_create_indexed.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -34,24 +34,28 @@ int32_t ompi_datatype_create_indexed( int count, const int* pBlockLength, const int* pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { - ompi_datatype_t* pdt; - int i; ptrdiff_t extent, disp, endat; + ompi_datatype_t* pdt; size_t dLength; + int i; - if( 0 == count ) { + /* ignore all cases that lead to an empty type */ + ompi_datatype_type_size(oldType, &dLength); + for( i = 0; (i < count) && (0 == pBlockLength[i]); i++ ); /* find first non zero */ + if( (i == count) || (0 == dLength) ) { return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } - disp = pDisp[0]; - dLength = pBlockLength[0]; + disp = pDisp[i]; + dLength = pBlockLength[i]; endat = disp + dLength; ompi_datatype_type_extent( oldType, &extent ); - pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) ); - for( i = 1; i < count; i++ ) { - if( endat == pDisp[i] ) { - /* contiguous with the previsious */ + pdt = ompi_datatype_create( (count - i) * (2 + oldType->super.desc.used) ); + for( i += 1; i < count; i++ ) { + if( 0 == pBlockLength[i] ) /* ignore empty length */ + continue; + if( endat == pDisp[i] ) { /* contiguous with the previsious */ dLength += pBlockLength[i]; endat += pBlockLength[i]; } else { @@ -71,26 +75,28 @@ int32_t ompi_datatype_create_indexed( int count, const int* pBlockLength, const int32_t ompi_datatype_create_hindexed( int count, const int* pBlockLength, const ptrdiff_t* pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { - ompi_datatype_t* pdt; - int i; ptrdiff_t extent, disp, endat; + ompi_datatype_t* pdt; size_t dLength; + int i; - if( 0 == count ) { - *newType = ompi_datatype_create( 0 ); - ompi_datatype_add( *newType, &ompi_mpi_datatype_null.dt, 0, 0, 0); - return OMPI_SUCCESS; + /* ignore all cases that lead to an empty type */ + ompi_datatype_type_size(oldType, &dLength); + for( i = 0; (i < count) && (0 == pBlockLength[i]); i++ ); /* find first non zero */ + if( (i == count) || (0 == dLength) ) { + return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } - ompi_datatype_type_extent( oldType, &extent ); - pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) ); - disp = pDisp[0]; - dLength = pBlockLength[0]; + disp = pDisp[i]; + dLength = pBlockLength[i]; endat = disp + dLength * extent; + ompi_datatype_type_extent( oldType, &extent ); - for( i = 1; i < count; i++ ) { - if( endat == pDisp[i] ) { - /* contiguous with the previsious */ + pdt = ompi_datatype_create( (count - i) * (2 + oldType->super.desc.used) ); + for( i += 1; i < count; i++ ) { + if( 0 == pBlockLength[i] ) /* ignore empty length */ + continue; + if( endat == pDisp[i] ) { /* contiguous with the previsious */ dLength += pBlockLength[i]; endat += pBlockLength[i] * extent; } else { @@ -110,21 +116,15 @@ int32_t ompi_datatype_create_hindexed( int count, const int* pBlockLength, const int32_t ompi_datatype_create_indexed_block( int count, int bLength, const int* pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { - ompi_datatype_t* pdt; - int i; ptrdiff_t extent, disp, endat; + ompi_datatype_t* pdt; size_t dLength; + int i; - ompi_datatype_type_extent( oldType, &extent ); if( (count == 0) || (bLength == 0) ) { - if( 0 == count ) { - return ompi_datatype_duplicate(&ompi_mpi_datatype_null.dt, newType); - } else { - *newType = ompi_datatype_create(1); - ompi_datatype_add( *newType, oldType, 0, pDisp[0] * extent, extent ); - return OMPI_SUCCESS; - } + return ompi_datatype_duplicate(&ompi_mpi_datatype_null.dt, newType); } + ompi_datatype_type_extent( oldType, &extent ); pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) ); disp = pDisp[0]; dLength = bLength; @@ -150,20 +150,15 @@ int32_t ompi_datatype_create_indexed_block( int count, int bLength, const int* p int32_t ompi_datatype_create_hindexed_block( int count, int bLength, const ptrdiff_t* pDisp, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { - ompi_datatype_t* pdt; - int i; ptrdiff_t extent, disp, endat; + ompi_datatype_t* pdt; size_t dLength; + int i; - ompi_datatype_type_extent( oldType, &extent ); if( (count == 0) || (bLength == 0) ) { - *newType = ompi_datatype_create(1); - if( 0 == count ) - ompi_datatype_add( *newType, &ompi_mpi_datatype_null.dt, 0, 0, 0 ); - else - ompi_datatype_add( *newType, oldType, 0, pDisp[0] * extent, extent ); - return OMPI_SUCCESS; + return ompi_datatype_duplicate(&ompi_mpi_datatype_null.dt, newType); } + ompi_datatype_type_extent( oldType, &extent ); pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) ); disp = pDisp[0]; dLength = bLength; diff --git a/ompi/datatype/ompi_datatype_create_struct.c b/ompi/datatype/ompi_datatype_create_struct.c index 98daa8bacbb..9c78f53fee3 100644 --- a/ompi/datatype/ompi_datatype_create_struct.c +++ b/ompi/datatype/ompi_datatype_create_struct.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -31,27 +31,27 @@ int32_t ompi_datatype_create_struct( int count, const int* pBlockLength, const ptrdiff_t* pDisp, ompi_datatype_t* const * pTypes, ompi_datatype_t** newType ) { - int i; ptrdiff_t disp = 0, endto, lastExtent, lastDisp; - int lastBlock; ompi_datatype_t *pdt, *lastType; + int lastBlock; + int i, start_from; - if( 0 == count ) { - *newType = ompi_datatype_create( 0 ); - ompi_datatype_add( *newType, &ompi_mpi_datatype_null.dt, 0, 0, 0); - return OMPI_SUCCESS; + /* Find first non-zero length element */ + for( i = 0; (i < count) && (0 == pBlockLength[i]); i++ ); + if( i == count ) { /* either nothing or nothing relevant */ + return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } - - /* if we compute the total number of elements before we can + /* compute the total number of elements before we can * avoid increasing the size of the desc array often. */ - lastType = (ompi_datatype_t*)pTypes[0]; - lastBlock = pBlockLength[0]; + start_from = i; + lastType = (ompi_datatype_t*)pTypes[start_from]; + lastBlock = pBlockLength[start_from]; lastExtent = lastType->super.ub - lastType->super.lb; - lastDisp = pDisp[0]; - endto = pDisp[0] + lastExtent * lastBlock; + lastDisp = pDisp[start_from]; + endto = pDisp[start_from] + lastExtent * lastBlock; - for( i = 1; i < count; i++ ) { + for( i = (start_from + 1); i < count; i++ ) { if( (pTypes[i] == lastType) && (pDisp[i] == endto) ) { lastBlock += pBlockLength[i]; endto = lastDisp + lastBlock * lastExtent; @@ -68,16 +68,16 @@ int32_t ompi_datatype_create_struct( int count, const int* pBlockLength, const p disp += lastType->super.desc.used; if( lastBlock != 1 ) disp += 2; - lastType = (ompi_datatype_t*)pTypes[0]; - lastBlock = pBlockLength[0]; + lastType = (ompi_datatype_t*)pTypes[start_from]; + lastBlock = pBlockLength[start_from]; lastExtent = lastType->super.ub - lastType->super.lb; - lastDisp = pDisp[0]; - endto = pDisp[0] + lastExtent * lastBlock; + lastDisp = pDisp[start_from]; + endto = pDisp[start_from] + lastExtent * lastBlock; pdt = ompi_datatype_create( (int32_t)disp ); /* Do again the same loop but now add the elements */ - for( i = 1; i < count; i++ ) { + for( i = (start_from + 1); i < count; i++ ) { if( (pTypes[i] == lastType) && (pDisp[i] == endto) ) { lastBlock += pBlockLength[i]; endto = lastDisp + lastBlock * lastExtent; diff --git a/ompi/datatype/ompi_datatype_create_vector.c b/ompi/datatype/ompi_datatype_create_vector.c index 1de8df4d2d2..c4829a4b54c 100644 --- a/ompi/datatype/ompi_datatype_create_vector.c +++ b/ompi/datatype/ompi_datatype_create_vector.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -28,23 +28,14 @@ #include "ompi/datatype/ompi_datatype.h" -/* Open questions ... - * - how to improuve the handling of these vectors (creating a temporary datatype - * can be ONLY a initial solution. - * - */ - int32_t ompi_datatype_create_vector( int count, int bLength, int stride, const ompi_datatype_t* oldType, ompi_datatype_t** newType ) { ompi_datatype_t *pTempData, *pData; ptrdiff_t extent = oldType->super.ub - oldType->super.lb; - - if( 0 == count ) { - *newType = ompi_datatype_create( 0 ); - ompi_datatype_add( *newType, &ompi_mpi_datatype_null.dt, 0, 0, 0); - return OMPI_SUCCESS; + if( (0 == count) || (0 == bLength) ) { + return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } pData = ompi_datatype_create( oldType->super.desc.used + 2 ); @@ -72,10 +63,8 @@ int32_t ompi_datatype_create_hvector( int count, int bLength, ptrdiff_t stride, ompi_datatype_t *pTempData, *pData; ptrdiff_t extent = oldType->super.ub - oldType->super.lb; - if( 0 == count ) { - *newType = ompi_datatype_create( 0 ); - ompi_datatype_add( *newType, &ompi_mpi_datatype_null.dt, 0, 0, 0); - return OMPI_SUCCESS; + if( (0 == count) || (0 == bLength) ) { + return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } pTempData = ompi_datatype_create( oldType->super.desc.used + 2 ); From 4f754d01562340f66d4eee40913dca3786a38909 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 15 May 2019 23:41:22 -0400 Subject: [PATCH 48/94] Optimized datatype description. Move toward a base type of vector (count, type, blocklen, extent, disp) with disp and extent applying toward the count repertition and blocklen being a contiguous memory of type type. Implement 2 optimizations on this description used during type_commit: - collapse: successive similar datatype descriptions are collapsed together with an increased count. - fusion: fuse successive datatype descriptions in order to minimize the number of resulting memcpy during pack/unpack. Fixes at the OMPI datatype level including: - Fix the create_hindexed and vector creation. - Fix the handling of [get|set]_elements and _count. - Correctly compute the dispacement for block indexed types. - Support the MPI_LB and MPI_UB deprecation, aka. OMPI_ENABLE_MPI1_COMPAT. Signed-off-by: George Bosilca --- ompi/datatype/ompi_datatype.h | 2 +- ompi/datatype/ompi_datatype_create_indexed.c | 8 +- ompi/datatype/ompi_datatype_external.c | 3 +- opal/datatype/opal_convertor.c | 17 +- opal/datatype/opal_datatype.h | 40 ++- opal/datatype/opal_datatype_add.c | 55 +++- opal/datatype/opal_datatype_copy.h | 63 ++-- opal/datatype/opal_datatype_get_count.c | 10 +- opal/datatype/opal_datatype_internal.h | 26 +- opal/datatype/opal_datatype_module.c | 1 + opal/datatype/opal_datatype_monotonic.c | 31 +- opal/datatype/opal_datatype_optimize.c | 287 +++++++++---------- opal/datatype/opal_datatype_pack.h | 144 ++++++---- opal/datatype/opal_datatype_position.c | 85 +++++- opal/datatype/opal_datatype_unpack.h | 145 ++++++---- 15 files changed, 548 insertions(+), 369 deletions(-) diff --git a/ompi/datatype/ompi_datatype.h b/ompi/datatype/ompi_datatype.h index 8b48bc30973..f589c874b64 100644 --- a/ompi/datatype/ompi_datatype.h +++ b/ompi/datatype/ompi_datatype.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2009-2013 The University of Tennessee and The University + * Copyright (c) 2009-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. diff --git a/ompi/datatype/ompi_datatype_create_indexed.c b/ompi/datatype/ompi_datatype_create_indexed.c index e72b41afc7d..2684d9d7df0 100644 --- a/ompi/datatype/ompi_datatype_create_indexed.c +++ b/ompi/datatype/ompi_datatype_create_indexed.c @@ -87,10 +87,10 @@ int32_t ompi_datatype_create_hindexed( int count, const int* pBlockLength, const return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType); } + ompi_datatype_type_extent( oldType, &extent ); disp = pDisp[i]; dLength = pBlockLength[i]; endat = disp + dLength * extent; - ompi_datatype_type_extent( oldType, &extent ); pdt = ompi_datatype_create( (count - i) * (2 + oldType->super.desc.used) ); for( i += 1; i < count; i++ ) { @@ -162,17 +162,17 @@ int32_t ompi_datatype_create_hindexed_block( int count, int bLength, const ptrdi pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) ); disp = pDisp[0]; dLength = bLength; - endat = disp + dLength; + endat = disp + dLength * extent; for( i = 1; i < count; i++ ) { if( endat == pDisp[i] ) { /* contiguous with the previsious */ dLength += bLength; - endat += bLength; + endat += bLength * extent; } else { ompi_datatype_add( pdt, oldType, dLength, disp, extent ); disp = pDisp[i]; dLength = bLength; - endat = disp + bLength; + endat = disp + bLength * extent; } } ompi_datatype_add( pdt, oldType, dLength, disp, extent ); diff --git a/ompi/datatype/ompi_datatype_external.c b/ompi/datatype/ompi_datatype_external.c index d47531ef29e..53b907218cf 100644 --- a/ompi/datatype/ompi_datatype_external.c +++ b/ompi/datatype/ompi_datatype_external.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, @@ -26,7 +26,6 @@ #include #include "ompi/runtime/params.h" -#include "ompi/communicator/communicator.h" #include "ompi/datatype/ompi_datatype.h" #include "opal/datatype/opal_convertor.h" diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index ce889f7e959..631d3adab43 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2017 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -324,8 +324,9 @@ int32_t opal_convertor_unpack( opal_convertor_t* pConv, return pConv->fAdvance( pConv, iov, out_size, max_data ); } -static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pConvertor, - size_t starting_point, const size_t* sizes ) +static inline int +opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pConvertor, + size_t starting_point, const size_t* sizes ) { dt_stack_t* pStack; /* pointer to the position on the stack */ const opal_datatype_t* pData = pConvertor->pDesc; @@ -349,7 +350,7 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pStack[0].disp = count * extent; /* now compute the number of pending bytes */ - count = starting_point - count * pData->size; + count = starting_point % pData->size; /** * We save the current displacement starting from the begining * of this data. @@ -370,9 +371,9 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t* return OPAL_SUCCESS; } -static inline -int opal_convertor_create_stack_at_begining( opal_convertor_t* convertor, - const size_t* sizes ) +static inline int +opal_convertor_create_stack_at_begining( opal_convertor_t* convertor, + const size_t* sizes ) { dt_stack_t* pStack = convertor->pStack; dt_elem_desc_t* pElems; @@ -402,7 +403,7 @@ int opal_convertor_create_stack_at_begining( opal_convertor_t* convertor, pStack[1].count = pElems[0].loop.loops; pStack[1].type = OPAL_DATATYPE_LOOP; } else { - pStack[1].count = pElems[0].elem.count; + pStack[1].count = pElems[0].elem.count * pElems[0].elem.blocklen; pStack[1].type = pElems[0].elem.common.type; } return OPAL_SUCCESS; diff --git a/opal/datatype/opal_datatype.h b/opal/datatype/opal_datatype.h index a836a5aae03..e1bc18c67f9 100644 --- a/opal/datatype/opal_datatype.h +++ b/opal/datatype/opal_datatype.h @@ -224,13 +224,41 @@ opal_datatype_is_contiguous_memory_layout( const opal_datatype_t* datatype, int3 } -OPAL_DECLSPEC void opal_datatype_dump( const opal_datatype_t* pData ); +OPAL_DECLSPEC void +opal_datatype_dump( const opal_datatype_t* pData ); + /* data creation functions */ -OPAL_DECLSPEC int32_t opal_datatype_clone( const opal_datatype_t * src_type, opal_datatype_t * dest_type ); -OPAL_DECLSPEC int32_t opal_datatype_create_contiguous( int count, const opal_datatype_t* oldType, opal_datatype_t** newType ); -OPAL_DECLSPEC int32_t opal_datatype_resize( opal_datatype_t* type, ptrdiff_t lb, ptrdiff_t extent ); -OPAL_DECLSPEC int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtAdd, size_t count, - ptrdiff_t disp, ptrdiff_t extent ); + +/** + * Create a duplicate of the source datatype. + */ +OPAL_DECLSPEC int32_t +opal_datatype_clone( const opal_datatype_t* src_type, + opal_datatype_t* dest_type ); +/** + * A contiguous array of identical datatypes. + */ +OPAL_DECLSPEC int32_t +opal_datatype_create_contiguous( int count, const opal_datatype_t* oldType, + opal_datatype_t** newType ); +/** + * Add a new datatype to the base type description. The count is the number + * repetitions of the same element to be added, and the extent is the extent + * of each element. The displacement is the initial displacement of the + * first element. + */ +OPAL_DECLSPEC int32_t +opal_datatype_add( opal_datatype_t* pdtBase, + const opal_datatype_t* pdtAdd, size_t count, + ptrdiff_t disp, ptrdiff_t extent ); + +/** + * Alter the lb and extent of an existing datatype in place. + */ +OPAL_DECLSPEC int32_t +opal_datatype_resize( opal_datatype_t* type, + ptrdiff_t lb, + ptrdiff_t extent ); static inline int32_t opal_datatype_type_lb( const opal_datatype_t* pData, ptrdiff_t* disp ) diff --git a/opal/datatype/opal_datatype_add.c b/opal/datatype/opal_datatype_add.c index 146ce12afe2..108b4e3d1be 100644 --- a/opal/datatype/opal_datatype_add.c +++ b/opal/datatype/opal_datatype_add.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2017 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -281,15 +281,23 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA if( (pdtAdd->flags & (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA)) == (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA) ) { if( NULL != pdtBase->ptypes ) pdtBase->ptypes[pdtAdd->id] += count; + + pLast->elem.common.flags = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED); pLast->elem.common.type = pdtAdd->id; - pLast->elem.count = count; pLast->elem.disp = disp; - pLast->elem.extent = extent; - pdtBase->desc.used++; - pLast->elem.common.flags = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED); - if( (extent != (ptrdiff_t)pdtAdd->size) && (count > 1) ) { /* gaps around the datatype */ - pLast->elem.common.flags &= ~(OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS); + pLast->elem.extent = count * extent; + /* assume predefined datatypes without extent, aka. contiguous */ + pLast->elem.count = 1; + pLast->elem.blocklen = count; + if( extent != (ptrdiff_t)pdtAdd->size ) { /* not contiguous: let's fix */ + pLast->elem.count = count; + pLast->elem.blocklen = 1; + pLast->elem.extent = extent; + if( count > 1 ) { /* gaps around the predefined datatype */ + pLast->elem.common.flags &= ~(OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS); + } } + pdtBase->desc.used++; } else { /* keep trace of the total number of basic datatypes in the datatype definition */ pdtBase->loops += pdtAdd->loops; @@ -299,13 +307,40 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) if( pdtAdd->ptypes[i] != 0 ) pdtBase->ptypes[i] += (count * pdtAdd->ptypes[i]); } - if( (1 == pdtAdd->desc.used) && (extent == (pdtAdd->ub - pdtAdd->lb)) && - (extent == pdtAdd->desc.desc[0].elem.extent) ){ + if( 1 == pdtAdd->desc.used ) { pLast->elem = pdtAdd->desc.desc[0].elem; - pLast->elem.count *= count; pLast->elem.disp += disp; + if( 1 == count ) { + /* Extent only has a meaning when there are multiple elements. Bail out */ + } else if( 1 == pLast->elem.count ) { + /* The size and true_extent of the added datatype are identical, signaling a datatype + * that is mostly contiguous with the exception of the initial and final gaps. These + * gaps do not matter here as they will amended (the initial gaps being shifted by the + * new displacement and the final gap being replaced with the new gap + */ + if( pdtAdd->desc.desc[0].elem.extent == extent ) { + /* pure bliss everything is fully contiguous and we can collapse + * everything by updating the blocklen and extent + */ + pLast->elem.blocklen *= count; + pLast->elem.extent *= count; + } else { + pLast->elem.count = count; + pLast->elem.extent = extent; + } + } else if( extent == (ptrdiff_t)(pLast->elem.count * pLast->elem.extent) ) { + /* It's just a repetition of the same element, increase the count */ + pLast->elem.count *= count; + } else { + /* No luck here, no optimization can be applied. Fall back to the + * normal case where we add a loop around the datatype. + */ + goto build_loop; + } pdtBase->desc.used++; } else { + +build_loop: /* if the extent of the datatype is the same as the extent of the loop * description of the datatype then we simply have to update the main loop. */ diff --git a/opal/datatype/opal_datatype_copy.h b/opal/datatype/opal_datatype_copy.h index 7aeac8e63ec..40f119a684d 100644 --- a/opal/datatype/opal_datatype_copy.h +++ b/opal/datatype/opal_datatype_copy.h @@ -48,37 +48,37 @@ static inline void _predefined_data( const dt_elem_desc_t* ELEM, unsigned char* DESTINATION, size_t* SPACE ) { - size_t _copy_count = (COUNT); - size_t _copy_blength; const ddt_elem_desc_t* _elem = &((ELEM)->elem); unsigned char* _source = (SOURCE) + _elem->disp; unsigned char* _destination = (DESTINATION) + _elem->disp; + size_t total_count = _elem->count * _elem->blocklen; + size_t do_now, do_now_bytes; - _copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size; + assert( (COUNT) == total_count); + assert( total_count <= ((*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size) ); - if( _copy_blength == (size_t)_elem->extent ) { - _copy_blength *= _copy_count; - OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_blength, (SOURCE_BASE), - (DATATYPE), (TOTAL_COUNT) ); - /* the extent and the size of the basic datatype are equals */ - DO_DEBUG( opal_output( 0, "copy 1. %s( %p, %p, %" PRIsize_t " ) => space %" PRIsize_t "\n", - STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, _copy_blength, *(SPACE) ); ); - MEM_OP( _destination, _source, _copy_blength ); - _source += _copy_blength; - _destination += _copy_blength; - } else { - for(size_t _i = 0; _i < _copy_count; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_blength, (SOURCE_BASE), - (DATATYPE), (TOTAL_COUNT) ); - DO_DEBUG( opal_output( 0, "copy 2. %s( %p, %p, %lu ) => space %lu\n", - STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); ); - MEM_OP( _destination, _source, _copy_blength ); - _source += _elem->extent; + /* We don't a prologue and epilogue here as we are __always__ working + * with full copies of the data description. + */ + + /** + * Compute how many full blocklen we need to do and do them. + */ + do_now = _elem->count; + if( 0 != do_now ) { + do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size; + for(size_t _i = 0; _i < do_now; _i++ ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( _source, do_now_bytes, (SOURCE_BASE), + (DATATYPE), (TOTAL_COUNT) ); + DO_DEBUG( opal_output( 0, "copy %s( %p, %p, %" PRIsize_t " ) => space %" PRIsize_t "\n", + STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, do_now_bytes, *(SPACE) ); ); + MEM_OP( _destination, _source, do_now_bytes ); _destination += _elem->extent; + _source += _elem->extent; + *(SPACE) -= do_now_bytes; } - _copy_blength *= _copy_count; + (COUNT) -= total_count; } - *(SPACE) -= _copy_blength; } static inline void _contiguous_loop( const dt_elem_desc_t* ELEM, @@ -147,12 +147,10 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i if( (ptrdiff_t)datatype->size == extent ) { /* all contiguous == no gaps around */ size_t total_length = iov_len_local; size_t memop_chunk = opal_datatype_memop_block_size; + OPAL_DATATYPE_SAFEGUARD_POINTER( source, iov_len_local, + (unsigned char*)source_base, datatype, count ); while( total_length > 0 ) { if( memop_chunk > total_length ) memop_chunk = total_length; - OPAL_DATATYPE_SAFEGUARD_POINTER( destination, memop_chunk, - (unsigned char*)destination_base, datatype, count ); - OPAL_DATATYPE_SAFEGUARD_POINTER( source, memop_chunk, - (unsigned char*)source_base, datatype, count ); DO_DEBUG( opal_output( 0, "copy c1. %s( %p, %p, %lu ) => space %lu\n", STRINGIFY(MEM_OP_NAME), (void*)destination, (void*)source, (unsigned long)memop_chunk, (unsigned long)total_length ); ); MEM_OP( destination, source, memop_chunk ); @@ -184,17 +182,12 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i pos_desc = 0; stack_pos = 0; - if( datatype->opt_desc.desc != NULL ) { - description = datatype->opt_desc.desc; - } else { + description = datatype->opt_desc.desc; + if( NULL == description ) { description = datatype->desc.desc; } - if( description[0].elem.common.type == OPAL_DATATYPE_LOOP ) - count_desc = description[0].loop.loops; - else - count_desc = description[0].elem.count; - pElem = &(description[pos_desc]); + UPDATE_INTERNAL_COUNTERS( description, 0, pElem, count_desc ); while( 1 ) { while( OPAL_LIKELY(pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA) ) { diff --git a/opal/datatype/opal_datatype_get_count.c b/opal/datatype/opal_datatype_get_count.c index ae085c42704..f75b86d0e2d 100644 --- a/opal/datatype/opal_datatype_get_count.c +++ b/opal/datatype/opal_datatype_get_count.c @@ -69,14 +69,14 @@ ssize_t opal_datatype_get_element_count( const opal_datatype_t* datatype, size_t while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* now here we have a basic datatype */ const opal_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]); - local_size = pElems[pos_desc].elem.count * basic_type->size; + local_size = (pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen) * basic_type->size; if( local_size >= iSize ) { local_size = iSize / basic_type->size; nbElems += (int32_t)local_size; iSize -= local_size * basic_type->size; return (iSize == 0 ? nbElems : -1); } - nbElems += pElems[pos_desc].elem.count; + nbElems += (pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen); iSize -= local_size; pos_desc++; /* advance to the next data */ } @@ -131,7 +131,7 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* now here we have a basic datatype */ const opal_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]); - local_length = pElems[pos_desc].elem.count; + local_length = (pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen); if( local_length >= count ) { *length += count * basic_type->size; return 0; @@ -188,8 +188,8 @@ int opal_datatype_compute_ptypes( opal_datatype_t* datatype ) } while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* now here we have a basic datatype */ - datatype->ptypes[pElems[pos_desc].elem.common.type] += pElems[pos_desc].elem.count; - nbElems += pElems[pos_desc].elem.count; + datatype->ptypes[pElems[pos_desc].elem.common.type] += pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen; + nbElems += pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen; DUMP( " compute_ptypes-add: type %d count %"PRIsize_t" (total type %"PRIsize_t" total %lld)\n", pElems[pos_desc].elem.common.type, datatype->ptypes[pElems[pos_desc].elem.common.type], diff --git a/opal/datatype/opal_datatype_internal.h b/opal/datatype/opal_datatype_internal.h index bc3f8aa7cab..2b2ddc0961e 100644 --- a/opal/datatype/opal_datatype_internal.h +++ b/opal/datatype/opal_datatype_internal.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2018 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -220,14 +220,14 @@ union dt_elem_desc { * elem.blocklen to create it. If the number is prime then create a second * element to account for the difference. */ -#define CREATE_ELEM( _place, _type, _flags, _count, _disp, _extent ) \ +#define CREATE_ELEM(_place, _type, _flags, _blocklen, _count, _disp, _extent) \ do { \ (_place)->elem.common.flags = (_flags) | OPAL_DATATYPE_FLAG_DATA; \ (_place)->elem.common.type = (_type); \ - (_place)->elem.disp = (_disp); \ - (_place)->elem.extent = (_extent); \ + (_place)->elem.blocklen = (_blocklen); \ (_place)->elem.count = (_count); \ - (_place)->elem.blocklen = 1; \ + (_place)->elem.extent = (_extent); \ + (_place)->elem.disp = (_disp); \ } while(0) /* * This array holds the descriptions desc.desc[2] of the predefined basic datatypes. @@ -480,22 +480,22 @@ static inline int GET_FIRST_NON_LOOP( const union dt_elem_desc* _pElem ) } #define UPDATE_INTERNAL_COUNTERS( DESCRIPTION, POSITION, ELEMENT, COUNTER ) \ - do { \ - (ELEMENT) = &((DESCRIPTION)[(POSITION)]); \ - if( OPAL_DATATYPE_LOOP == (ELEMENT)->elem.common.type ) \ - (COUNTER) = (ELEMENT)->loop.loops; \ - else \ - (COUNTER) = (ELEMENT)->elem.count; \ + do { \ + (ELEMENT) = &((DESCRIPTION)[(POSITION)]); \ + if( OPAL_DATATYPE_LOOP == (ELEMENT)->elem.common.type ) \ + (COUNTER) = (ELEMENT)->loop.loops; \ + else \ + (COUNTER) = (ELEMENT)->elem.count * (ELEMENT)->elem.blocklen; \ } while (0) OPAL_DECLSPEC int opal_datatype_contain_basic_datatypes( const struct opal_datatype_t* pData, char* ptr, size_t length ); OPAL_DECLSPEC int opal_datatype_dump_data_flags( unsigned short usflags, char* ptr, size_t length ); OPAL_DECLSPEC int opal_datatype_dump_data_desc( union dt_elem_desc* pDesc, int nbElems, char* ptr, size_t length ); -#if OPAL_ENABLE_DEBUG extern bool opal_position_debug; extern bool opal_copy_debug; -#endif /* OPAL_ENABLE_DEBUG */ +extern bool opal_unpack_debug; +extern bool opal_pack_debug; END_C_DECLS #endif /* OPAL_DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED */ diff --git a/opal/datatype/opal_datatype_module.c b/opal/datatype/opal_datatype_module.c index 2d8dedc94e7..7976392b63e 100644 --- a/opal/datatype/opal_datatype_module.c +++ b/opal/datatype/opal_datatype_module.c @@ -226,6 +226,7 @@ int32_t opal_datatype_init( void ) datatype->desc.desc[0].elem.common.type = i; /* datatype->desc.desc[0].elem.blocklen XXX not set at the moment, it will be needed later */ datatype->desc.desc[0].elem.count = 1; + datatype->desc.desc[0].elem.blocklen = 1; datatype->desc.desc[0].elem.disp = 0; datatype->desc.desc[0].elem.extent = datatype->size; diff --git a/opal/datatype/opal_datatype_monotonic.c b/opal/datatype/opal_datatype_monotonic.c index b467d95ecbe..247fd66142d 100644 --- a/opal/datatype/opal_datatype_monotonic.c +++ b/opal/datatype/opal_datatype_monotonic.c @@ -2,6 +2,9 @@ /* * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2019 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,35 +21,43 @@ #include "opal/datatype/opal_datatype_internal.h" #include "opal/datatype/opal_convertor.h" +#define OPAL_DATATYPE_MAX_MONOTONIC_IOVEC 32 + +/** + * Check if the datatype describes a memory layout where the pointers to + * the contiguous pieces are always advancing in the same direction, i.e. + * there is no potential for overlap. + */ int32_t opal_datatype_is_monotonic(opal_datatype_t* type ) { + struct iovec iov[OPAL_DATATYPE_MAX_MONOTONIC_IOVEC]; + ptrdiff_t upper_limit = (ptrdiff_t)type->true_lb; /* as conversion base will be NULL the first address is true_lb */ + size_t max_data = 0x7FFFFFFF; opal_convertor_t *pConv; + bool monotonic = true; uint32_t iov_count; - struct iovec iov[5]; - size_t max_data = 0; - long prev = -1; int rc; - bool monotonic = true; pConv = opal_convertor_create( opal_local_arch, 0 ); if (OPAL_UNLIKELY(NULL == pConv)) { - return 0; + return -1; } rc = opal_convertor_prepare_for_send( pConv, type, 1, NULL ); if( OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { OBJ_RELEASE(pConv); - return 0; + return -1; } do { - iov_count = 5; + iov_count = OPAL_DATATYPE_MAX_MONOTONIC_IOVEC; rc = opal_convertor_raw( pConv, iov, &iov_count, &max_data); - for (uint32_t i=0; icommon.flags = OPAL_DATATYPE_FLAG_BASIC; \ - _elem->common.type = OPAL_DATATYPE_LOOP; \ - _elem->count = 0; \ - _elem->disp = 0; \ - _elem->extent = 0; \ - } while (0) - static int32_t opal_datatype_optimize_short( opal_datatype_t* pData, size_t count, dt_type_desc_t* pTypeDesc ) { dt_elem_desc_t* pElemDesc; - ddt_elem_desc_t opt_elem; - dt_stack_t* pOrigStack; - dt_stack_t* pStack; /* pointer to the position on the stack */ - int32_t pos_desc = 0; /* actual position in the description of the derived datatype */ - int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1; - int32_t type = OPAL_DATATYPE_LOOP, nbElems = 0, continuity; - ptrdiff_t total_disp = 0, last_extent = 1, last_disp = 0; - uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */ - uint32_t i; - size_t last_length = 0; + dt_stack_t *pOrigStack, *pStack; /* pointer to the position on the stack */ + int32_t pos_desc = 0; /* actual position in the description of the derived datatype */ + int32_t stack_pos = 0; + int32_t nbElems = 0; + ptrdiff_t total_disp = 0; + ddt_elem_desc_t last = {.common.flags = 0xFFFF /* all on */, .count = 0, .disp = 0}, compress; + ddt_elem_desc_t* current; pOrigStack = pStack = (dt_stack_t*)malloc( sizeof(dt_stack_t) * (pData->loops+2) ); SAVE_STACK( pStack, -1, 0, count, 0 ); @@ -64,22 +51,17 @@ opal_datatype_optimize_short( opal_datatype_t* pData, pTypeDesc->desc = pElemDesc = (dt_elem_desc_t*)malloc( sizeof(dt_elem_desc_t) * pTypeDesc->length ); pTypeDesc->used = 0; - SET_EMPTY_ELEMENT( &opt_elem ); assert( OPAL_DATATYPE_END_LOOP == pData->desc.desc[pData->desc.used].elem.common.type ); - opt_elem.common.type = OPAL_DATATYPE_LOOP; - opt_elem.common.flags = 0xFFFF; /* keep all for the first datatype */ - opt_elem.count = 0; - opt_elem.disp = pData->desc.desc[pData->desc.used].end_loop.first_elem_disp; - opt_elem.extent = 0; while( stack_pos >= 0 ) { if( OPAL_DATATYPE_END_LOOP == pData->desc.desc[pos_desc].elem.common.type ) { /* end of the current loop */ ddt_endloop_desc_t* end_loop = &(pData->desc.desc[pos_desc].end_loop); - if( last_length != 0 ) { - CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); + if( 0 != last.count ) { + CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC, + last.blocklen, last.count, last.disp, last.extent ); pElemDesc++; nbElems++; - last_disp += last_length; - last_length = 0; + last.disp += last.count; + last.count= 0; } CREATE_LOOP_END( pElemDesc, nbElems - pStack->index + 1, /* # of elems in this loop */ end_loop->first_elem_disp, end_loop->size, end_loop->common.flags ); @@ -97,153 +79,146 @@ opal_datatype_optimize_short( opal_datatype_t* pData, ddt_loop_desc_t* loop = (ddt_loop_desc_t*)&(pData->desc.desc[pos_desc]); ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]); int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) ); - ptrdiff_t loop_disp = pData->desc.desc[pos_desc + index].elem.disp; - continuity = ((last_disp + (ptrdiff_t)last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size) - == (total_disp + loop_disp)); if( loop->common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { - /* the loop is contiguous or composed by contiguous elements with a gap */ - if( loop->extent == (ptrdiff_t)end_loop->size ) { - /* the whole loop is contiguous */ - if( !continuity ) { - if( 0 != last_length ) { - CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, - last_length, last_disp, last_extent ); - pElemDesc++; nbElems++; - last_length = 0; - } - last_disp = total_disp + loop_disp; - } - last_length = (last_length * opal_datatype_basicDatatypes[last_type]->size - + loop->loops * end_loop->size); - last_type = OPAL_DATATYPE_UINT1; - last_extent = 1; - } else { - int counter = loop->loops; - ptrdiff_t merged_disp = 0; - /* if the previous data is contiguous with this piece and it has a length not ZERO */ - if( last_length != 0 ) { - if( continuity ) { - last_length *= opal_datatype_basicDatatypes[last_type]->size; - last_length += end_loop->size; - last_type = OPAL_DATATYPE_UINT1; - last_extent = 1; - counter--; - merged_disp = loop->extent; /* merged loop, update the disp of the remaining elems */ - } - CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, - last_length, last_disp, last_extent ); - pElemDesc++; nbElems++; - last_disp += last_length; - last_length = 0; - last_type = OPAL_DATATYPE_LOOP; - } - /** - * The content of the loop is contiguous (maybe with a gap before or after). - * - * If any of the loops have been merged with the previous element, then the - * displacement of the first element (or the displacement of all elements if the - * loop will be removed) must be updated accordingly. - */ - if( counter <= 2 ) { - merged_disp += end_loop->first_elem_disp; - while( counter > 0 ) { - CREATE_ELEM( pElemDesc, OPAL_DATATYPE_UINT1, OPAL_DATATYPE_FLAG_BASIC, - end_loop->size, merged_disp, 1); - pElemDesc++; nbElems++; counter--; - merged_disp += loop->extent; - } - } else { - CREATE_LOOP_START( pElemDesc, counter, 2, loop->extent, loop->common.flags ); - pElemDesc++; nbElems++; - CREATE_ELEM( pElemDesc, OPAL_DATATYPE_UINT1, OPAL_DATATYPE_FLAG_BASIC, - end_loop->size, loop_disp, 1); - pElemDesc++; nbElems++; - CREATE_LOOP_END( pElemDesc, 2, end_loop->first_elem_disp + merged_disp, - end_loop->size, end_loop->common.flags ); - pElemDesc++; nbElems++; + assert(pData->desc.desc[pos_desc + index].elem.disp == end_loop->first_elem_disp); + compress.common.flags = loop->common.flags; + compress.common.type = pData->desc.desc[pos_desc + index].elem.common.type; + compress.blocklen = pData->desc.desc[pos_desc + index].elem.blocklen; + for( uint32_t i = index+1; i < loop->items; i++ ) { + current = &pData->desc.desc[pos_desc + i].elem; + assert(1 == current->count); + if( (current->common.type == OPAL_DATATYPE_LOOP) || + compress.common.type != current->common.type ) { + compress.common.type = OPAL_DATATYPE_UINT1; + compress.blocklen = end_loop->size; + break; } + compress.blocklen += current->blocklen; } + compress.count = loop->loops; + compress.extent = loop->extent; + compress.disp = end_loop->first_elem_disp; + + /** + * The current loop has been compressed and can now be treated as if it + * was a data element. We can now look if it can be fused with last, + * as done in the fusion of 2 elements below. Let's use the same code. + */ pos_desc += loop->items + 1; - } else { - ddt_elem_desc_t* elem = (ddt_elem_desc_t*)&(pData->desc.desc[pos_desc+1]); - if( last_length != 0 ) { - CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); - pElemDesc++; nbElems++; - last_disp += last_length; - last_length = 0; - last_type = OPAL_DATATYPE_LOOP; - } - if( 2 == loop->items ) { /* small loop */ - if( (1 == elem->count) - && (elem->extent == (ptrdiff_t)opal_datatype_basicDatatypes[elem->common.type]->size) ) { - CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags & ~OPAL_DATATYPE_FLAG_CONTIGUOUS, - loop->loops, elem->disp, loop->extent ); + current = &compress; + goto fuse_loops; + } + + /** + * If the content of the loop is not contiguous there is little we can do + * that would not incur significant optimization cost and still be beneficial + * in reducing the number of memcpy during pack/unpack. + */ + + if( 0 != last.count ) { /* Generate the pending element */ + CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC, + last.blocklen, last.count, last.disp, last.extent ); + pElemDesc++; nbElems++; + last.count = 0; + last.common.type = OPAL_DATATYPE_LOOP; + } + + /* Can we unroll the loop? */ + if( (loop->items <= 3) && (loop->loops <= 2) ) { + ptrdiff_t elem_displ = 0; + for( uint32_t i = 0; i < loop->loops; i++ ) { + for( uint32_t j = 0; j < (loop->items - 1); j++ ) { + current = &pData->desc.desc[pos_desc + index + j].elem; + CREATE_ELEM( pElemDesc, current->common.type, current->common.flags, + current->blocklen, current->count, current->disp + elem_displ, current->extent ); pElemDesc++; nbElems++; - pos_desc += loop->items + 1; - goto complete_loop; - } else if( loop->loops < 3 ) { - ptrdiff_t elem_displ = elem->disp; - for( i = 0; i < loop->loops; i++ ) { - CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags, - elem->count, elem_displ, elem->extent ); - elem_displ += loop->extent; - pElemDesc++; nbElems++; - } - pos_desc += loop->items + 1; - goto complete_loop; } + elem_displ += loop->extent; } - CREATE_LOOP_START( pElemDesc, loop->loops, loop->items, loop->extent, loop->common.flags ); - pElemDesc++; nbElems++; - PUSH_STACK( pStack, stack_pos, nbElems, OPAL_DATATYPE_LOOP, loop->loops, total_disp ); - pos_desc++; - DDT_DUMP_STACK( pStack, stack_pos, pData->desc.desc, "advance loops" ); + pos_desc += loop->items + 1; + goto complete_loop; } + + CREATE_LOOP_START( pElemDesc, loop->loops, loop->items, loop->extent, loop->common.flags ); + pElemDesc++; nbElems++; + PUSH_STACK( pStack, stack_pos, nbElems, OPAL_DATATYPE_LOOP, loop->loops, total_disp ); + pos_desc++; + DDT_DUMP_STACK( pStack, stack_pos, pData->desc.desc, "advance loops" ); + complete_loop: total_disp = pStack->disp; /* update the displacement */ continue; } - while( pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */ - /* now here we have a basic datatype */ - type = pData->desc.desc[pos_desc].elem.common.type; - continuity = ((last_disp + (ptrdiff_t)last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size) - == (total_disp + pData->desc.desc[pos_desc].elem.disp)); + while( pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* go over all basic datatype elements */ + current = &pData->desc.desc[pos_desc].elem; + pos_desc++; /* point to the next element as current points to the current one */ - if( (pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && continuity && - (pData->desc.desc[pos_desc].elem.extent == (int32_t)opal_datatype_basicDatatypes[type]->size) ) { - if( type == last_type ) { - last_length += pData->desc.desc[pos_desc].elem.count; - last_extent = pData->desc.desc[pos_desc].elem.extent; + fuse_loops: + if( 0 == last.count ) { /* first data of the datatype */ + last = *current; + continue; /* next data */ + } + + /* are the two elements compatible: aka they have very similar values and they + * can be merged together by increasing the count. This optimizes the memory + * required for storing the datatype description. + */ + if( ((last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) == + (current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size)) && + (current->disp == (last.disp + (ptrdiff_t)last.count * last.extent)) && + ((last.count == 1) || (current->count == 1) || (last.extent == current->extent)) ) { + last.count += current->count; + if( last.count == 1 ) { + last.extent = current->extent; + } /* otherwise keep the last.extent */ + /* find the lowest common denomitaor type */ + if( last.common.type != current->common.type ) { + last.common.type = OPAL_DATATYPE_UINT1; + last.blocklen *= opal_datatype_basicDatatypes[last.common.type]->size; + } + continue; /* next data */ + } + /* are the elements fusionable such that we can fusion the last blocklen of one with the first + * blocklen of the other. + */ + if( (ptrdiff_t)(last.disp + (last.count - 1) * last.extent + last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) == + current->disp ) { + if( last.count != 1 ) { + CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC, + last.blocklen, last.count - 1, last.disp, last.extent ); + pElemDesc++; nbElems++; + last.disp += (last.count - 1) * last.extent; + last.count = 1; + } + if( last.common.type == current->common.type ) { + last.blocklen += current->blocklen; } else { - if( last_length == 0 ) { - last_type = type; - last_length = pData->desc.desc[pos_desc].elem.count; - last_extent = pData->desc.desc[pos_desc].elem.extent; - } else { - last_length = last_length * opal_datatype_basicDatatypes[last_type]->size + - pData->desc.desc[pos_desc].elem.count * opal_datatype_basicDatatypes[type]->size; - last_type = OPAL_DATATYPE_UINT1; - last_extent = 1; - } + last.blocklen = ((last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) + + (current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size)); + last.common.type = OPAL_DATATYPE_UINT1; } - last_flags &= pData->desc.desc[pos_desc].elem.common.flags; - } else { - if( last_length != 0 ) { - CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); + last.extent += current->extent; + if( current->count != 1 ) { + CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC, + last.blocklen, last.count, last.disp, last.extent ); pElemDesc++; nbElems++; + last = *current; + last.count -= 1; + last.disp += last.extent; } - last_disp = total_disp + pData->desc.desc[pos_desc].elem.disp; - last_length = pData->desc.desc[pos_desc].elem.count; - last_extent = pData->desc.desc[pos_desc].elem.extent; - last_type = type; + continue; } - pos_desc++; /* advance to the next data */ + CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC, + last.blocklen, last.count, last.disp, last.extent ); + pElemDesc++; nbElems++; + last = *current; } } - if( last_length != 0 ) { - CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent ); + if( 0 != last.count ) { + CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC, + last.blocklen, last.count, last.disp, last.extent ); pElemDesc++; nbElems++; } /* cleanup the stack */ diff --git a/opal/datatype/opal_datatype_pack.h b/opal/datatype/opal_datatype_pack.h index f952cabc3c0..66259f8b66b 100644 --- a/opal/datatype/opal_datatype_pack.h +++ b/opal/datatype/opal_datatype_pack.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. @@ -19,8 +19,6 @@ #include "opal_config.h" -#include - #if !defined(CHECKSUM) && OPAL_CUDA_SUPPORT /* Make use of existing macro to do CUDA style memcpy */ #undef MEMCPY_CSUM @@ -28,75 +26,117 @@ CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) ) #endif -static inline void pack_predefined_data( opal_convertor_t* CONVERTOR, - const dt_elem_desc_t* ELEM, - size_t* COUNT, - unsigned char** SOURCE, - unsigned char** DESTINATION, - size_t* SPACE ) +static inline void +pack_predefined_data( opal_convertor_t* CONVERTOR, + const dt_elem_desc_t* ELEM, + size_t* COUNT, + unsigned char** memory, + unsigned char** packed, + size_t* SPACE ) { - size_t _copy_count = *(COUNT); - size_t _copy_blength; const ddt_elem_desc_t* _elem = &((ELEM)->elem); - unsigned char* _source = (*SOURCE) + _elem->disp; + size_t total_count = _elem->count * _elem->blocklen; + size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size; + size_t do_now, do_now_bytes; + unsigned char* _memory = (*memory) + _elem->disp; + + assert( *(COUNT) <= _elem->count * _elem->blocklen); + + if( cando_count > *(COUNT) ) + cando_count = *(COUNT); + + /** + * First check if we already did something on this element ? + */ + do_now = (total_count - *(COUNT)); /* done elements */ + if( 0 != do_now ) { + do_now = do_now % _elem->blocklen; /* partial blocklen? */ + + if( 0 != do_now ) { + size_t left_in_block = _elem->blocklen - do_now; /* left in the current blocklen */ + do_now = (left_in_block > cando_count ) ? cando_count : left_in_block; + do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; - _copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size; - if( (_copy_count * _copy_blength) > *(SPACE) ) { - _copy_count = (*(SPACE) / _copy_blength); - if( 0 == _copy_count ) return; /* nothing to do */ + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "pack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n", + (void*)*(packed), (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); + MEMCPY_CSUM( *(packed), _memory, do_now_bytes, (CONVERTOR) ); + _memory = (*memory) + _elem->disp + (ptrdiff_t)do_now_bytes; + /* compensate if we just completed a blocklen */ + if( do_now == left_in_block ) + _memory += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size); + *(packed) += do_now_bytes; + *(SPACE) -= do_now_bytes; + *(COUNT) -= do_now; + cando_count -= do_now; + } } - if( (ptrdiff_t)_copy_blength == _elem->extent ) { - _copy_blength *= _copy_count; - /* the extent and the size of the basic datatype are equal */ - OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_blength, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "pack 1. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)*(DESTINATION), (void*)_source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE)) ); ); - MEMCPY_CSUM( *(DESTINATION), _source, _copy_blength, (CONVERTOR) ); - _source += _copy_blength; - *(DESTINATION) += _copy_blength; - } else { - for(size_t _i = 0; _i < _copy_count; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_blength, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); + /** + * Compute how many full blocklen we need to do and do them. + */ + do_now = cando_count / _elem->blocklen; + if( 0 != do_now ) { + do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size; + for(size_t _i = 0; _i < do_now; _i++ ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)*(DESTINATION), (void*)_source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); ); - MEMCPY_CSUM( *(DESTINATION), _source, _copy_blength, (CONVERTOR) ); - *(DESTINATION) += _copy_blength; - _source += _elem->extent; + (void*)*(packed), (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)*(SPACE) ); ); + MEMCPY_CSUM( *(packed), _memory, do_now_bytes, (CONVERTOR) ); + *(packed) += do_now_bytes; + _memory += _elem->extent; + *(SPACE) -= do_now_bytes; + *(COUNT) -= _elem->blocklen; + cando_count -= _elem->blocklen; } - _copy_blength *= _copy_count; } - *(SOURCE) = _source - _elem->disp; - *(SPACE) -= _copy_blength; - *(COUNT) -= _copy_count; + + /** + * As an epilog do anything left from the last blocklen. + */ + do_now = cando_count; + if( 0 != do_now ) { + do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n", + (void*)*(packed), (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); + MEMCPY_CSUM( *(packed), _memory, do_now_bytes, (CONVERTOR) ); + _memory += do_now_bytes; + *(packed) += do_now_bytes; + *(SPACE) -= do_now_bytes; + *(COUNT) -= do_now; + } + + *(memory) = _memory - _elem->disp; } static inline void pack_contiguous_loop( opal_convertor_t* CONVERTOR, const dt_elem_desc_t* ELEM, size_t* COUNT, - unsigned char** SOURCE, - unsigned char** DESTINATION, + unsigned char** memory, + unsigned char** packed, size_t* SPACE ) { const ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM); const ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items); - unsigned char* _source = (*SOURCE) + _end_loop->first_elem_disp; + unsigned char* _memory = (*memory) + _end_loop->first_elem_disp; size_t _copy_loops = *(COUNT); if( (_copy_loops * _end_loop->size) > *(SPACE) ) _copy_loops = (*(SPACE) / _end_loop->size); for(size_t _i = 0; _i < _copy_loops; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _end_loop->size, (CONVERTOR)->pBaseBuf, + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, _end_loop->size, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)*(DESTINATION), (void*)_source, (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); ); - MEMCPY_CSUM( *(DESTINATION), _source, _end_loop->size, (CONVERTOR) ); - *(DESTINATION) += _end_loop->size; - _source += _loop->extent; + (void*)*(packed), (void*)_memory, (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); ); + MEMCPY_CSUM( *(packed), _memory, _end_loop->size, (CONVERTOR) ); + *(packed) += _end_loop->size; + _memory += _loop->extent; } - *(SOURCE) = _source - _end_loop->first_elem_disp; + *(memory) = _memory - _end_loop->first_elem_disp; *(SPACE) -= _copy_loops * _end_loop->size; *(COUNT) -= _copy_loops; } @@ -104,12 +144,12 @@ static inline void pack_contiguous_loop( opal_convertor_t* CONVERTOR, #define PACK_PREDEFINED_DATATYPE( CONVERTOR, /* the convertor */ \ ELEM, /* the basic element to be packed */ \ COUNT, /* the number of elements */ \ - SOURCE, /* the source pointer (char*) */ \ - DESTINATION, /* the destination pointer (char*) */ \ + MEMORY, /* the source pointer (char*) */ \ + PACKED, /* the destination pointer (char*) */ \ SPACE ) /* the space in the destination buffer */ \ -pack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) ) +pack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(MEMORY), &(PACKED), &(SPACE) ) -#define PACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, SOURCE, DESTINATION, SPACE ) \ - pack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) ) +#define PACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, MEMORY, PACKED, SPACE ) \ + pack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(MEMORY), &(PACKED), &(SPACE) ) #endif /* OPAL_DATATYPE_PACK_H_HAS_BEEN_INCLUDED */ diff --git a/opal/datatype/opal_datatype_position.c b/opal/datatype/opal_datatype_position.c index 3b8eaec69c6..381a31086d6 100644 --- a/opal/datatype/opal_datatype_position.c +++ b/opal/datatype/opal_datatype_position.c @@ -61,22 +61,77 @@ position_predefined_data( opal_convertor_t* CONVERTOR, unsigned char** POINTER, size_t* SPACE ) { - size_t _copy_count = *(COUNT); - size_t _copy_blength; - ddt_elem_desc_t* _elem = &((ELEM)->elem); + const ddt_elem_desc_t* _elem = &((ELEM)->elem); + size_t total_count = _elem->count * _elem->blocklen; + size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size; + size_t do_now, do_now_bytes; + unsigned char* _memory = (*POINTER) + _elem->disp; - _copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size; - if( (_copy_count * _copy_blength) > *(SPACE) ) { - _copy_count = *(SPACE) / _copy_blength; - if( 0 == _copy_count ) return; /* nothing to do */ + assert( *(COUNT) <= _elem->count * _elem->blocklen); + + if( cando_count > *(COUNT) ) + cando_count = *(COUNT); + + /** + * First check if we already did something on this element ? + */ + do_now = (total_count - *(COUNT)); /* done elements */ + if( 0 != do_now ) { + do_now = do_now % _elem->blocklen; /* partial blocklen? */ + + if( 0 != do_now ) { + size_t left_in_block = _elem->blocklen - do_now; /* left in the current blocklen */ + do_now = (left_in_block > cando_count ) ? cando_count : left_in_block; + do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; + + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu [prolog]\n", + (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); + _memory = *(POINTER) + _elem->disp + (ptrdiff_t)do_now_bytes; + /* compensate if we just completed a blocklen */ + if( do_now == left_in_block ) + _memory += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size); + *(SPACE) -= do_now_bytes; + *(COUNT) -= do_now; + cando_count -= do_now; + } + } + + /** + * Compute how many full blocklen we need to do and do them. + */ + do_now = cando_count / _elem->blocklen; + if( 0 != do_now ) { + do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size; + for(size_t _i = 0; _i < do_now; _i++ ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu\n", + (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)*(SPACE) ); ); + _memory += _elem->extent; + *(SPACE) -= do_now_bytes; + *(COUNT) -= _elem->blocklen; + cando_count -= _elem->blocklen; + } + } + + /** + * As an epilog do anything left from the last blocklen. + */ + do_now = cando_count; + if( 0 != do_now ) { + do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu [epilog]\n", + (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); + _memory += do_now_bytes; + *(SPACE) -= do_now_bytes; + *(COUNT) -= do_now; } - _copy_blength *= _copy_count; - OPAL_DATATYPE_SAFEGUARD_POINTER( *(POINTER) + _elem->disp, _copy_blength, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - *(POINTER) += (_copy_count * _elem->extent); - *(SPACE) -= _copy_blength; - *(COUNT) -= _copy_count; + *(POINTER) = _memory - _elem->disp; } /** @@ -128,8 +183,8 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, /* We dont want to have to parse the datatype multiple times. What we are interested in * here is to compute the number of completed datatypes that we can move forward, update - * the counters and finally compute the position taking in account only the remaining - * elements. The only problem is that we have to modify all the elements on the stack. + * the counters and compute the position taking in account only the remaining elements. + * The only problem is that we have to modify all the elements on the stack. */ iov_len_local = *position - pConvertor->bConverted; if( iov_len_local > pConvertor->pDesc->size ) { diff --git a/opal/datatype/opal_datatype_unpack.h b/opal/datatype/opal_datatype_unpack.h index d837aad5ab7..f51a609294d 100644 --- a/opal/datatype/opal_datatype_unpack.h +++ b/opal/datatype/opal_datatype_unpack.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. @@ -27,83 +27,124 @@ #endif static inline void -unpack_predefined_data( opal_convertor_t* CONVERTOR, /* the convertor */ - const dt_elem_desc_t* ELEM, /* the element description */ - size_t* COUNT, /* the number of elements */ - unsigned char** SOURCE, /* the source pointer */ - unsigned char** DESTINATION, /* the destination pointer */ - size_t* SPACE ) /* the space in the destination buffer */ +unpack_predefined_data( opal_convertor_t* CONVERTOR, + const dt_elem_desc_t* ELEM, + size_t* COUNT, + unsigned char** packed, + unsigned char** memory, + size_t* SPACE ) { - size_t _copy_count = *(COUNT); - size_t _copy_blength; const ddt_elem_desc_t* _elem = &((ELEM)->elem); - unsigned char* _destination = (*DESTINATION) + _elem->disp; + size_t total_count = _elem->count * _elem->blocklen; + size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size; + size_t do_now, do_now_bytes; + unsigned char* _memory = (*memory) + _elem->disp; - _copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size; - if( (_copy_count * _copy_blength) > *(SPACE) ) { - _copy_count = (*(SPACE) / _copy_blength); - if( 0 == _copy_count ) return; /* nothing to do */ + assert( *(COUNT) <= _elem->count * _elem->blocklen); + + if( cando_count > *(COUNT) ) + cando_count = *(COUNT); + + /** + * First check if we already did something on this element ? + */ + do_now = (total_count - *(COUNT)); /* done elements */ + if( 0 != do_now ) { + do_now = do_now % _elem->blocklen; /* partial blocklen? */ + + if( 0 != do_now ) { + size_t left_in_block = _elem->blocklen - do_now; /* left in the current blocklen */ + do_now = (left_in_block > cando_count ) ? cando_count : left_in_block; + do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; + + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "unpack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n", + (void*)_memory, (void*)*(packed), (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); + MEMCPY_CSUM( _memory, *(packed), do_now_bytes, (CONVERTOR) ); + _memory = (*memory) + _elem->disp + (ptrdiff_t)do_now_bytes; + /* compensate if we just completed a blocklen */ + if( do_now == left_in_block ) + _memory += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size); + *(packed) += do_now_bytes; + *(SPACE) -= do_now_bytes; + *(COUNT) -= do_now; + cando_count -= do_now; + } } - if( (ptrdiff_t)_copy_blength == _elem->extent ) { - _copy_blength *= _copy_count; - /* the extent and the size of the basic datatype are equal */ - OPAL_DATATYPE_SAFEGUARD_POINTER( _destination, _copy_blength, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "unpack 1. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)_destination, (void*)*(SOURCE), (unsigned long)_copy_blength, (unsigned long)(*(SPACE)) ); ); - MEMCPY_CSUM( _destination, *(SOURCE), _copy_blength, (CONVERTOR) ); - *(SOURCE) += _copy_blength; - _destination += _copy_blength; - } else { - for(size_t _i = 0; _i < _copy_count; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _destination, _copy_blength, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)_destination, (void*)*(SOURCE), (unsigned long)_copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); ); - MEMCPY_CSUM( _destination, *(SOURCE), _copy_blength, (CONVERTOR) ); - *(SOURCE) += _copy_blength; - _destination += _elem->extent; + /** + * Compute how many full blocklen we need to do and do them. + */ + do_now = cando_count / _elem->blocklen; + if( 0 != do_now ) { + do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size; + for(size_t _i = 0; _i < do_now; _i++ ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n", + (void*)_memory, (void*)*(packed), (unsigned long)do_now_bytes, (unsigned long)*(SPACE) ); ); + MEMCPY_CSUM( _memory, *(packed), do_now_bytes, (CONVERTOR) ); + *(packed) += do_now_bytes; + _memory += _elem->extent; + *(SPACE) -= do_now_bytes; + *(COUNT) -= _elem->blocklen; + cando_count -= _elem->blocklen; } - _copy_blength *= _copy_count; } - (*DESTINATION) = _destination - _elem->disp; - *(SPACE) -= _copy_blength; - *(COUNT) -= _copy_count; + + /** + * As an epilog do anything left from the last blocklen. + */ + do_now = cando_count; + if( 0 != do_now ) { + do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n", + (void*)_memory, (void*)*(packed), (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); + MEMCPY_CSUM( _memory, *(packed), do_now_bytes, (CONVERTOR) ); + _memory += do_now_bytes; + *(packed) += do_now_bytes; + *(SPACE) -= do_now_bytes; + *(COUNT) -= do_now; + } + + *(memory) = _memory - _elem->disp; } static inline void unpack_contiguous_loop( opal_convertor_t* CONVERTOR, const dt_elem_desc_t* ELEM, size_t* COUNT, - unsigned char** SOURCE, - unsigned char** DESTINATION, + unsigned char** packed, + unsigned char** memory, size_t* SPACE ) { const ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM); const ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items); - unsigned char* _destination = (*DESTINATION) + _end_loop->first_elem_disp; + unsigned char* _memory = (*memory) + _end_loop->first_elem_disp; size_t _copy_loops = *(COUNT); if( (_copy_loops * _end_loop->size) > *(SPACE) ) _copy_loops = (*(SPACE) / _end_loop->size); for(size_t _i = 0; _i < _copy_loops; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _destination, _end_loop->size, (CONVERTOR)->pBaseBuf, + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, _end_loop->size, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)_destination, (void*)*(SOURCE), (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); ); - MEMCPY_CSUM( _destination, *(SOURCE), _end_loop->size, (CONVERTOR) ); - *(SOURCE) += _end_loop->size; - _destination += _loop->extent; + (void*)_memory, (void*)*(packed), (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); ); + MEMCPY_CSUM( _memory, *(packed), _end_loop->size, (CONVERTOR) ); + *(packed) += _end_loop->size; + _memory += _loop->extent; } - *(DESTINATION) = _destination - _end_loop->first_elem_disp; - *(SPACE) -= _copy_loops * _end_loop->size; - *(COUNT) -= _copy_loops; + *(memory) = _memory - _end_loop->first_elem_disp; + *(SPACE) -= _copy_loops * _end_loop->size; + *(COUNT) -= _copy_loops; } -#define UNPACK_PREDEFINED_DATATYPE( CONVERTOR, ELEM, COUNT, SOURCE, DESTINATION, SPACE ) \ - unpack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) ) +#define UNPACK_PREDEFINED_DATATYPE( CONVERTOR, ELEM, COUNT, PACKED, MEMORY, SPACE ) \ + unpack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(PACKED), &(MEMORY), &(SPACE) ) -#define UNPACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, SOURCE, DESTINATION, SPACE ) \ - unpack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) ) +#define UNPACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, PACKED, MEMORY, SPACE ) \ + unpack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(PACKED), &(MEMORY), &(SPACE) ) #endif /* OPAL_DATATYPE_UNPACK_H_HAS_BEEN_INCLUDED */ From 8b794235b8d9882154f45734f1290a4eeedfe4c6 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 8 May 2019 13:08:48 -0400 Subject: [PATCH 49/94] Update the datatype dump to match the actual types. Update the comments to better reflect what is going on. Minor indentations. Signed-off-by: George Bosilca --- ompi/datatype/ompi_datatype_module.c | 18 ++++---- opal/datatype/opal_convertor.c | 6 +-- opal/datatype/opal_convertor_raw.c | 30 ++++++------- opal/datatype/opal_datatype_dump.c | 36 +++++++-------- test/datatype/ddt_raw.c | 66 +++++++++++++++------------- 5 files changed, 80 insertions(+), 76 deletions(-) diff --git a/ompi/datatype/ompi_datatype_module.c b/ompi/datatype/ompi_datatype_module.c index 3ee09173cd8..33e8d9b9e92 100644 --- a/ompi/datatype/ompi_datatype_module.c +++ b/ompi/datatype/ompi_datatype_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2017 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -736,14 +736,14 @@ void ompi_datatype_dump( const ompi_datatype_t* pData ) length = length * 100 + 500; buffer = (char*)malloc( length ); index += snprintf( buffer, length - index, - "Datatype %p[%s] id %d size %ld align %d opal_id %d length %d used %d\n" - "true_lb %ld true_ub %ld (true_extent %ld) lb %ld ub %ld (extent %ld)\n" - "nbElems %d loops %d flags %X (", - (void*)pData, pData->name, pData->id, - (long)pData->super.size, (int)pData->super.align, pData->super.id, (int)pData->super.desc.length, (int)pData->super.desc.used, - (long)pData->super.true_lb, (long)pData->super.true_ub, (long)(pData->super.true_ub - pData->super.true_lb), - (long)pData->super.lb, (long)pData->super.ub, (long)(pData->super.ub - pData->super.lb), - (int)pData->super.nbElems, (int)pData->super.loops, (int)pData->super.flags ); + "Datatype %p[%s] id %d size %" PRIsize_t " align %u opal_id %u length %" PRIsize_t " used %" PRIsize_t "\n" + "true_lb %td true_ub %td (true_extent %td) lb %td ub %td (extent %td)\n" + "nbElems %" PRIsize_t " loops %u flags %X (", + (void*)pData, pData->name, pData->id, + pData->super.size, pData->super.align, (uint32_t)pData->super.id, pData->super.desc.length, pData->super.desc.used, + pData->super.true_lb, pData->super.true_ub, pData->super.true_ub - pData->super.true_lb, + pData->super.lb, pData->super.ub, pData->super.ub - pData->super.lb, + pData->super.nbElems, pData->super.loops, (int)pData->super.flags ); /* dump the flags */ if( ompi_datatype_is_predefined(pData) ) { index += snprintf( buffer + index, length - index, "predefined " ); diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 631d3adab43..331cb95a715 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -700,12 +700,12 @@ int opal_convertor_clone( const opal_convertor_t* source, void opal_convertor_dump( opal_convertor_t* convertor ) { - opal_output( 0, "Convertor %p count %" PRIsize_t" stack position %d bConverted %" PRIsize_t "\n" - "\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %" PRIsize_t "\n" + opal_output( 0, "Convertor %p count %" PRIsize_t " stack position %u bConverted %" PRIsize_t "\n" + "\tlocal_size %" PRIsize_t " remote_size %" PRIsize_t " flags %X stack_size %u pending_length %" PRIsize_t "\n" "\tremote_arch %u local_arch %u\n", (void*)convertor, convertor->count, convertor->stack_pos, convertor->bConverted, - (unsigned long)convertor->local_size, (unsigned long)convertor->remote_size, + convertor->local_size, convertor->remote_size, convertor->flags, convertor->stack_size, convertor->partial_length, convertor->remoteArch, opal_local_arch ); if( convertor->flags & CONVERTOR_RECV ) opal_output( 0, "unpack "); diff --git a/opal/datatype/opal_convertor_raw.c b/opal/datatype/opal_convertor_raw.c index 28022809679..3c2073155b2 100644 --- a/opal/datatype/opal_convertor_raw.c +++ b/opal/datatype/opal_convertor_raw.c @@ -32,13 +32,13 @@ /** * This function always work in local representation. This means no representation - * conversion (i.e. no heterogeneity) has to be taken into account, and that all + * conversion (i.e. no heterogeneity) is taken into account, and that all * length we're working on are local. */ int32_t opal_convertor_raw( opal_convertor_t* pConvertor, - struct iovec* iov, uint32_t* iov_count, - size_t* length ) + struct iovec* iov, uint32_t* iov_count, + size_t* length ) { const opal_datatype_t *pData = pConvertor->pDesc; dt_stack_t* pStack; /* pointer to the position on the stack */ @@ -77,9 +77,9 @@ opal_convertor_raw( opal_convertor_t* pConvertor, description = pConvertor->use_desc->desc; /* For the first step we have to add both displacement to the source. After in the - * main while loop we will set back the source_base to the correct value. This is - * due to the fact that the convertor can stop in the middle of a data with a count - */ + * main while loop we will set back the source_base to the correct value. This is + * due to the fact that the convertor can stop in the middle of a data with a count + */ pStack = pConvertor->pStack + pConvertor->stack_pos; pos_desc = pStack->index; source_base = pConvertor->pBaseBuf + pStack->disp; @@ -101,9 +101,9 @@ opal_convertor_raw( opal_convertor_t* pConvertor, blength *= count_desc; /* now here we have a basic datatype */ OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf, - pConvertor->pDesc, pConvertor->count ); + pConvertor->pDesc, pConvertor->count ); DO_DEBUG( opal_output( 0, "raw 1. iov[%d] = {base %p, length %" PRIsize_t "}\n", - index, (void*)source_base, (unsigned long)blength ); ); + index, (void*)source_base, blength ); ); iov[index].iov_base = (IOVBASE_TYPE *) source_base; iov[index].iov_len = blength; source_base += blength; @@ -114,9 +114,9 @@ opal_convertor_raw( opal_convertor_t* pConvertor, } else { for(size_t i = count_desc; (i > 0) && (index < *iov_count); i--, index++ ) { OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf, - pConvertor->pDesc, pConvertor->count ); + pConvertor->pDesc, pConvertor->count ); DO_DEBUG( opal_output( 0, "raw 2. iov[%d] = {base %p, length %" PRIsize_t "}\n", - index, (void*)source_base, (unsigned long)blength ); ); + index, (void*)source_base, blength ); ); iov[index].iov_base = (IOVBASE_TYPE *) source_base; iov[index].iov_len = blength; source_base += pElem->elem.extent; @@ -141,8 +141,8 @@ opal_convertor_raw( opal_convertor_t* pConvertor, if( --(pStack->count) == 0 ) { /* end of loop */ if( pConvertor->stack_pos == 0 ) { /* we lie about the size of the next element in order to - * make sure we exit the main loop. - */ + * make sure we exit the main loop. + */ *iov_count = index; goto complete_loop; /* completed */ } @@ -174,7 +174,7 @@ opal_convertor_raw( opal_convertor_t* pConvertor, source_base += offset; for(size_t i = MIN(count_desc, *iov_count - index); i > 0; i--, index++ ) { OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, end_loop->size, pConvertor->pBaseBuf, - pConvertor->pDesc, pConvertor->count ); + pConvertor->pDesc, pConvertor->count ); iov[index].iov_base = (IOVBASE_TYPE *) source_base; iov[index].iov_len = end_loop->size; source_base += pElem->loop.extent; @@ -198,14 +198,14 @@ opal_convertor_raw( opal_convertor_t* pConvertor, PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, pStack->disp + local_disp); pos_desc++; - update_loop_description: /* update the current state */ + update_loop_description: /* update the current state */ source_base = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" ); continue; } } -complete_loop: + complete_loop: pConvertor->bConverted += raw_data; /* update the already converted bytes */ *length = raw_data; *iov_count = index; diff --git a/opal/datatype/opal_datatype_dump.c b/opal/datatype/opal_datatype_dump.c index 4c26292b8be..7782a805d0a 100644 --- a/opal/datatype/opal_datatype_dump.c +++ b/opal/datatype/opal_datatype_dump.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2017 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -64,7 +64,7 @@ int opal_datatype_dump_data_flags( unsigned short usflags, char* ptr, size_t len int index = 0; if( length < 22 ) return 0; index = snprintf( ptr, 22, "-----------[---][---]" ); /* set everything to - */ - if( usflags & OPAL_DATATYPE_FLAG_COMMITTED ) ptr[1] = 'c'; + if( usflags & OPAL_DATATYPE_FLAG_COMMITTED ) ptr[1] = 'c'; if( usflags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) ptr[2] = 'C'; if( usflags & OPAL_DATATYPE_FLAG_OVERLAP ) ptr[3] = 'o'; if( usflags & OPAL_DATATYPE_FLAG_USER_LB ) ptr[4] = 'l'; @@ -90,17 +90,17 @@ int opal_datatype_dump_data_desc( dt_elem_desc_t* pDesc, int nbElems, char* ptr, index += snprintf( ptr + index, length - index, "%15s ", opal_datatype_basicDatatypes[pDesc->elem.common.type]->name ); if( length <= (size_t)index ) break; if( OPAL_DATATYPE_LOOP == pDesc->elem.common.type ) - index += snprintf( ptr + index, length - index, "%d times the next %d elements extent %d\n", - (int)pDesc->loop.loops, (int)pDesc->loop.items, - (int)pDesc->loop.extent ); + index += snprintf( ptr + index, length - index, "%u times the next %u elements extent %td\n", + pDesc->loop.loops, pDesc->loop.items, + pDesc->loop.extent ); else if( OPAL_DATATYPE_END_LOOP == pDesc->elem.common.type ) - index += snprintf( ptr + index, length - index, "prev %d elements first elem displacement %ld size of data %d\n", - (int)pDesc->end_loop.items, (long)pDesc->end_loop.first_elem_disp, - (int)pDesc->end_loop.size ); + index += snprintf( ptr + index, length - index, "prev %u elements first elem displacement %td size of data %" PRIsize_t "\n", + pDesc->end_loop.items, pDesc->end_loop.first_elem_disp, + pDesc->end_loop.size ); else - index += snprintf( ptr + index, length - index, "count %" PRIsize_t " disp 0x%lx (%ld) blen %d extent %ld (size %ld)\n", - pDesc->elem.count, (long)pDesc->elem.disp, (long)pDesc->elem.disp, (int)pDesc->elem.blocklen, - pDesc->elem.extent, (long)(pDesc->elem.count * opal_datatype_basicDatatypes[pDesc->elem.common.type]->size) ); + index += snprintf( ptr + index, length - index, "count %" PRIsize_t " disp 0x%tx (%td) blen %u extent %td (size %zd)\n", + pDesc->elem.count, pDesc->elem.disp, pDesc->elem.disp, pDesc->elem.blocklen, + pDesc->elem.extent, (pDesc->elem.count * pDesc->elem.blocklen * opal_datatype_basicDatatypes[pDesc->elem.common.type]->size) ); pDesc++; if( length <= (size_t)index ) break; @@ -118,13 +118,13 @@ void opal_datatype_dump( const opal_datatype_t* pData ) length = pData->opt_desc.used + pData->desc.used; length = length * 100 + 500; buffer = (char*)malloc( length ); - index += snprintf( buffer, length - index, "Datatype %p[%s] size %ld align %d id %d length %d used %d\n" - "true_lb %ld true_ub %ld (true_extent %ld) lb %ld ub %ld (extent %ld)\n" - "nbElems %" PRIsize_t " loops %d flags %X (", - (void*)pData, pData->name, (long)pData->size, (int)pData->align, pData->id, (int)pData->desc.length, (int)pData->desc.used, - (long)pData->true_lb, (long)pData->true_ub, (long)(pData->true_ub - pData->true_lb), - (long)pData->lb, (long)pData->ub, (long)(pData->ub - pData->lb), - pData->nbElems, (int)pData->loops, (int)pData->flags ); + index += snprintf( buffer, length - index, "Datatype %p[%s] size %" PRIsize_t " align %u id %u length %" PRIsize_t " used %" PRIsize_t "\n" + "true_lb %td true_ub %td (true_extent %td) lb %td ub %td (extent %td)\n" + "nbElems %" PRIsize_t " loops %u flags %X (", + (void*)pData, pData->name, pData->size, pData->align, (uint32_t)pData->id, pData->desc.length, pData->desc.used, + pData->true_lb, pData->true_ub, pData->true_ub - pData->true_lb, + pData->lb, pData->ub, pData->ub - pData->lb, + pData->nbElems, pData->loops, (int)pData->flags ); /* dump the flags */ if( pData->flags == OPAL_DATATYPE_FLAG_PREDEFINED ) index += snprintf( buffer + index, length - index, "predefined " ); diff --git a/test/datatype/ddt_raw.c b/test/datatype/ddt_raw.c index de35d6b83f4..bba285ceea0 100644 --- a/test/datatype/ddt_raw.c +++ b/test/datatype/ddt_raw.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -71,7 +71,7 @@ static int test_upper( unsigned int length ) iov_count = 5; max_data = 0; opal_convertor_raw( pConv, iov, &iov_count, &max_data ); - i -= max_data; + i -= max_data; } GET_TIME( end ); total_time = ELAPSED_TIME( start, end ); @@ -85,12 +85,12 @@ static int test_upper( unsigned int length ) } /** - * Conversion function. They deal with data-types in 3 ways, always making local copies. + * Conversion function. They deal with datatypes in 3 ways, always making local copies. * In order to allow performance testings, there are 3 functions: * - one copying directly from one memory location to another one using the - * data-type copy function. - * - one which use a 2 convertors created with the same data-type - * - and one using 2 convertors created from different data-types. + * datatype copy function. + * - one which use a 2 convertors created with the same datatype + * - and one using 2 convertors created from different datatypes. * */ static int local_copy_ddt_raw( ompi_datatype_t* pdt, int count, int iov_num ) @@ -114,13 +114,13 @@ static int local_copy_ddt_raw( ompi_datatype_t* pdt, int count, int iov_num ) GET_TIME( start ); while( 0 == opal_convertor_raw(convertor, iov, &iov_count, &max_data) ) { #if 0 - printf( "New raw extraction (iov_count = %d, max_data = %zu)\n", - iov_count, max_data ); - for( i = 0; i < iov_count; i++ ) { - printf( "\t{%p, %d}\n", iov[i].iov_base, iov[i].iov_len ); - } + printf( "New raw extraction (iov_count = %d, max_data = %zu)\n", + iov_count, max_data ); + for( i = 0; i < iov_count; i++ ) { + printf( "\t{%p, %d}\n", iov[i].iov_base, iov[i].iov_len ); + } #endif - remaining_length -= max_data; + remaining_length -= max_data; iov_count = iov_num; } remaining_length -= max_data; @@ -129,19 +129,23 @@ static int local_copy_ddt_raw( ompi_datatype_t* pdt, int count, int iov_num ) printf( "raw extraction in %ld microsec\n", total_time ); OBJ_RELEASE( convertor ); if( remaining_length != 0 ) { - printf( "Not all raw description was been extracted (%lu bytes missing)\n", - (unsigned long) remaining_length ); + printf( "Not all raw description was been extracted (%lu bytes missing)\n", + (unsigned long) remaining_length ); } free(iov); return OMPI_SUCCESS; } /** - * Main function. Call several tests and print-out the results. It try to stress the convertor - * using difficult data-type constructions as well as strange segment sizes for the conversion. - * Usually, it is able to detect most of the data-type and convertor problems. Any modifications - * on the data-type engine should first pass all the tests from this file, before going into other - * tests. + * Go over a set of datatypes and copy them using the raw functionality provided by the + * convertor. The goal of this test is to stress the convertor using several more or less + * difficult datatype, with a large set of segment sizes for the conversion. It can be used + * to highlight the raw capability of the convertor as well as detecting datatype convertor + * problems. + * + * This test is part of the testing infrastructure for the core datatype engine. As such any + * modifications on the datatype engine should first pass all the tests from this file, + * before going into other tests. */ int main( int argc, char* argv[] ) { @@ -226,7 +230,7 @@ int main( int argc, char* argv[] ) OBJ_RELEASE( pdt3 ); assert( pdt3 == NULL ); printf( ">>--------------------------------------------<<\n" ); - printf( " Contiguous data-type (MPI_DOUBLE)\n" ); + printf( " Contiguous datatype (MPI_DOUBLE)\n" ); pdt = MPI_DOUBLE; if( outputFlags & CHECK_PACK_UNPACK ) { local_copy_ddt_raw(pdt, 4500, iov_num); @@ -235,37 +239,37 @@ int main( int argc, char* argv[] ) printf( ">>--------------------------------------------<<\n" ); if( outputFlags & CHECK_PACK_UNPACK ) { - printf( "Contiguous multiple data-type (4500*1)\n" ); + printf( "Contiguous multiple datatype (4500*1)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 4500 ); local_copy_ddt_raw(pdt, 1, iov_num); OBJ_RELEASE( pdt ); assert( pdt == NULL ); - printf( "Contiguous multiple data-type (450*10)\n" ); + printf( "Contiguous multiple datatype (450*10)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 450 ); local_copy_ddt_raw(pdt, 10, iov_num); OBJ_RELEASE( pdt ); assert( pdt == NULL ); - printf( "Contiguous multiple data-type (45*100)\n" ); + printf( "Contiguous multiple datatype (45*100)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 45 ); local_copy_ddt_raw(pdt, 100, iov_num); OBJ_RELEASE( pdt ); assert( pdt == NULL ); - printf( "Contiguous multiple data-type (100*45)\n" ); + printf( "Contiguous multiple datatype (100*45)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 100 ); local_copy_ddt_raw(pdt, 45, iov_num); OBJ_RELEASE( pdt ); assert( pdt == NULL ); - printf( "Contiguous multiple data-type (10*450)\n" ); + printf( "Contiguous multiple datatype (10*450)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 10 ); local_copy_ddt_raw(pdt, 450, iov_num); OBJ_RELEASE( pdt ); assert( pdt == NULL ); - printf( "Contiguous multiple data-type (1*4500)\n" ); + printf( "Contiguous multiple datatype (1*4500)\n" ); pdt = create_contiguous_type( MPI_DOUBLE, 1 ); local_copy_ddt_raw(pdt, 4500, iov_num); OBJ_RELEASE( pdt ); assert( pdt == NULL ); } printf( ">>--------------------------------------------<<\n" ); printf( ">>--------------------------------------------<<\n" ); - printf( "Vector data-type (450 times 10 double stride 11)\n" ); + printf( "Vector datatype (450 times 10 double stride 11)\n" ); pdt = create_vector_type( MPI_DOUBLE, 450, 10, 11 ); if( outputFlags & DUMP_DATA_AFTER_COMMIT ) { - ompi_datatype_dump( pdt ); + ompi_datatype_dump( pdt ); } if( outputFlags & CHECK_PACK_UNPACK ) { local_copy_ddt_raw(pdt, 1, iov_num); @@ -292,9 +296,9 @@ int main( int argc, char* argv[] ) printf( ">>--------------------------------------------<<\n" ); pdt = test_create_blacs_type(); if( outputFlags & CHECK_PACK_UNPACK ) { - if( outputFlags & DUMP_DATA_AFTER_COMMIT ) { - ompi_datatype_dump( pdt ); - } + if( outputFlags & DUMP_DATA_AFTER_COMMIT ) { + ompi_datatype_dump( pdt ); + } local_copy_ddt_raw(pdt, 4500, iov_num); } printf( ">>--------------------------------------------<<\n" ); From 4cdc2155e540d13b3145aa5cc095f4d7282c072d Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 20 May 2019 11:39:16 -0400 Subject: [PATCH 50/94] Optimize the raw representation. Merge contiguous iov in order to minimize the number of returned iovec. Signed-off-by: George Bosilca --- opal/datatype/opal_convertor_raw.c | 175 ++++++++++++++++++----------- 1 file changed, 109 insertions(+), 66 deletions(-) diff --git a/opal/datatype/opal_convertor_raw.c b/opal/datatype/opal_convertor_raw.c index 3c2073155b2..df2340122a9 100644 --- a/opal/datatype/opal_convertor_raw.c +++ b/opal/datatype/opal_convertor_raw.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. @@ -30,6 +30,29 @@ #define DO_DEBUG(INST) #endif /* OPAL_ENABLE_DEBUG */ +/* Take a new iovec (base + len) and try to merge it with what we already + * have. If we succeed return 0 and move forward, if not save it into a new + * iovec location. If we need to go to a new position and we reach the end + * of the iovec array, return 1 to signal we did not saved the last iovec. + */ +static inline int +opal_convertor_merge_iov( struct iovec* iov, uint32_t* iov_count, + IOVBASE_TYPE* base, size_t len, + uint32_t* idx ) +{ + if( 0 != iov[*idx].iov_len ) { + if( (base == ((char*)iov[*idx].iov_base + iov[*idx].iov_len)) ) { + iov[*idx].iov_len += len; /* merge with previous iovec */ + return 0; + } /* cannot merge, move to the next position */ + *idx = *idx + 1; + if( *idx == *iov_count ) return 1; /* do not overwrite outside the iove array boundaries */ + } + iov[*idx].iov_base = base; + iov[*idx].iov_len = len; + return 0; +} + /** * This function always work in local representation. This means no representation * conversion (i.e. no heterogeneity) is taken into account, and that all @@ -44,10 +67,11 @@ opal_convertor_raw( opal_convertor_t* pConvertor, dt_stack_t* pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ size_t count_desc; /* the number of items already done in the actual pos_desc */ + size_t do_now, blength; dt_elem_desc_t* description, *pElem; unsigned char *source_base; /* origin of the data */ - size_t raw_data = 0; /* sum of raw data lengths in the iov_len fields */ - uint32_t index = 0; /* the iov index and a simple counter */ + size_t sum_iov_len = 0; /* sum of raw data lengths in the iov_len fields */ + uint32_t index = 0; /* the iov index and a simple counter */ assert( (*iov_count) > 0 ); if( OPAL_LIKELY(pConvertor->flags & CONVERTOR_COMPLETED) ) { @@ -87,64 +111,86 @@ opal_convertor_raw( opal_convertor_t* pConvertor, pStack--; pConvertor->stack_pos--; pElem = &(description[pos_desc]); - source_base += pStack->disp; + DO_DEBUG( opal_output( 0, "raw start pos_desc %d count_desc %" PRIsize_t " disp %ld\n" "stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n", pos_desc, count_desc, (long)(source_base - pConvertor->pBaseBuf), pConvertor->stack_pos, pStack->index, pStack->count, (long)pStack->disp ); ); + + iov[index].iov_len = 0; + /* Special case if we start from a position that is in the middle of a data element blocklen. + * We can treat this outside the loop as it is an exception that can only happen once, + * and will simplify the loop handling. + */ + if( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { + const ddt_elem_desc_t* current = &(pElem->elem); + + if( count_desc != (current->count * current->blocklen) ) { /* Not the full element description */ + do_now = current->blocklen - (count_desc % current->blocklen); /* how much left in the block */ + if( do_now ) { + source_base += current->disp; + blength = do_now * opal_datatype_basicDatatypes[current->common.type]->size; + OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf, + pConvertor->pDesc, pConvertor->count ); + DO_DEBUG( opal_output( 0, "raw 1. iov[%d] = {base %p, length %" PRIsize_t "}\n", + index, (void*)source_base, blength ); ); + opal_convertor_merge_iov( iov, iov_count, + (IOVBASE_TYPE *) source_base, blength, &index ); + /* not check the return value, we know there was at least one element in the iovec */ + sum_iov_len += blength; + count_desc -= do_now; + + source_base += (current->extent - current->disp + + (current->blocklen - do_now) * opal_datatype_basicDatatypes[current->common.type]->size); + } + } + } + while( 1 ) { while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { - size_t blength = opal_datatype_basicDatatypes[pElem->elem.common.type]->size; - source_base += pElem->elem.disp; - if( blength == (size_t)pElem->elem.extent ) { /* no resized data */ - if( index < *iov_count ) { - blength *= count_desc; - /* now here we have a basic datatype */ - OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf, - pConvertor->pDesc, pConvertor->count ); - DO_DEBUG( opal_output( 0, "raw 1. iov[%d] = {base %p, length %" PRIsize_t "}\n", - index, (void*)source_base, blength ); ); - iov[index].iov_base = (IOVBASE_TYPE *) source_base; - iov[index].iov_len = blength; - source_base += blength; - raw_data += blength; - index++; - count_desc = 0; - } - } else { - for(size_t i = count_desc; (i > 0) && (index < *iov_count); i--, index++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf, - pConvertor->pDesc, pConvertor->count ); - DO_DEBUG( opal_output( 0, "raw 2. iov[%d] = {base %p, length %" PRIsize_t "}\n", - index, (void*)source_base, blength ); ); - iov[index].iov_base = (IOVBASE_TYPE *) source_base; - iov[index].iov_len = blength; - source_base += pElem->elem.extent; - raw_data += blength; - count_desc--; - } + const ddt_elem_desc_t* current = &(pElem->elem); + source_base += current->disp; + + do_now = current->count; + if( count_desc != (current->count * current->blocklen) ) { + do_now = count_desc / current->blocklen; + assert( 0 == (count_desc % current->blocklen) ); } - source_base -= pElem->elem.disp; + + blength = current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size; + for(size_t _i = 0; _i < do_now; _i++ ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf, + pConvertor->pDesc, pConvertor->count ); + DO_DEBUG( opal_output( 0, "raw 2. iov[%d] = {base %p, length %" PRIsize_t "}\n", + index, (void*)source_base, blength ); ); + if( opal_convertor_merge_iov( iov, iov_count, + (IOVBASE_TYPE *) source_base, blength, &index ) ) + break; /* no more iovec available, bail out */ + + source_base += current->extent; + sum_iov_len += blength; + count_desc -= current->blocklen; + } + if( 0 == count_desc ) { /* completed */ source_base = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); continue; } + source_base -= current->disp; goto complete_loop; } if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ DO_DEBUG( opal_output( 0, "raw end_loop count %" PRIsize_t " stack_pos %d" - " pos_desc %d disp %ld space %lu\n", + " pos_desc %d disp %ld space %" PRIsize_t "\n", pStack->count, pConvertor->stack_pos, - pos_desc, (long)pStack->disp, (unsigned long)raw_data ); ); + pos_desc, (long)pStack->disp, sum_iov_len ); ); if( --(pStack->count) == 0 ) { /* end of loop */ - if( pConvertor->stack_pos == 0 ) { - /* we lie about the size of the next element in order to - * make sure we exit the main loop. - */ - *iov_count = index; - goto complete_loop; /* completed */ + if( 0 == pConvertor->stack_pos ) { + /* we're done. Force the exit of the main for loop (around iovec) */ + index++; /* account for the currently updating iovec */ + goto complete_loop; } pConvertor->stack_pos--; pStack--; @@ -155,15 +201,15 @@ opal_convertor_raw( opal_convertor_t* pConvertor, pStack->disp += (pData->ub - pData->lb); } else { assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type ); - pStack->disp += description[pStack->index].loop.extent; + pStack->disp += description[pStack->index].loop.extent; /* jump by the loop extent */ } } source_base = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DO_DEBUG( opal_output( 0, "raw new_loop count %" PRIsize_t " stack_pos %d " - "pos_desc %d disp %ld space %lu\n", + "pos_desc %d disp %ld space %" PRIsize_t "\n", pStack->count, pConvertor->stack_pos, - pos_desc, (long)pStack->disp, (unsigned long)raw_data ); ); + pos_desc, (long)pStack->disp, sum_iov_len ); ); } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { ptrdiff_t local_disp = (ptrdiff_t)source_base; @@ -172,42 +218,39 @@ opal_convertor_raw( opal_convertor_t* pConvertor, if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { ptrdiff_t offset = end_loop->first_elem_disp; source_base += offset; - for(size_t i = MIN(count_desc, *iov_count - index); i > 0; i--, index++ ) { + for(; count_desc > 0; ) { OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, end_loop->size, pConvertor->pBaseBuf, pConvertor->pDesc, pConvertor->count ); - iov[index].iov_base = (IOVBASE_TYPE *) source_base; - iov[index].iov_len = end_loop->size; + if( opal_convertor_merge_iov( iov, iov_count, + (IOVBASE_TYPE *) source_base, end_loop->size, &index ) ) { + source_base -= offset; + goto complete_loop; + } + source_base += pElem->loop.extent; - raw_data += end_loop->size; + sum_iov_len += end_loop->size; count_desc--; DO_DEBUG( opal_output( 0, "raw contig loop generate iov[%d] = {base %p, length %" PRIsize_t "}" - "space %lu [pos_desc %d]\n", + "space %" PRIsize_t " [pos_desc %d]\n", index, iov[index].iov_base, iov[index].iov_len, - (unsigned long)raw_data, pos_desc ); ); + sum_iov_len, pos_desc ); ); } source_base -= offset; - if( 0 == count_desc ) { /* completed */ - pos_desc += pElem->loop.items + 1; - goto update_loop_description; - } - } - if( index == *iov_count ) { /* all iov have been filled, we need to bail out */ - goto complete_loop; + pos_desc += pElem->loop.items + 1; + } else { + local_disp = (ptrdiff_t)source_base - local_disp; + PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, + pStack->disp + local_disp); + pos_desc++; } - local_disp = (ptrdiff_t)source_base - local_disp; - PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc, - pStack->disp + local_disp); - pos_desc++; - update_loop_description: /* update the current state */ source_base = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" ); - continue; } } complete_loop: - pConvertor->bConverted += raw_data; /* update the already converted bytes */ - *length = raw_data; + pConvertor->bConverted += sum_iov_len; /* update the already converted bytes */ + *length = sum_iov_len; *iov_count = index; if( pConvertor->bConverted == pConvertor->local_size ) { pConvertor->flags |= CONVERTOR_COMPLETED; From 0a00b02e4882cc0cf612128a715073ea3f9ce688 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Sat, 18 May 2019 19:31:24 -0400 Subject: [PATCH 51/94] Small improvements on the test. Rework the to_self test to be able to be used as a benchmark. Signed-off-by: George Bosilca --- opal/datatype/opal_convertor.c | 2 +- opal/datatype/opal_datatype_optimize.c | 13 +- test/datatype/ddt_raw2.c | 29 +-- test/datatype/opal_datatype_test.c | 3 +- test/datatype/opal_ddt_lib.c | 4 +- test/datatype/to_self.c | 348 ++++++++++++++++--------- test/datatype/unpack_ooo.c | 21 +- 7 files changed, 267 insertions(+), 153 deletions(-) diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 331cb95a715..7a449302bff 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -357,7 +357,7 @@ opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pConvertor, */ if( OPAL_LIKELY(0 == count) ) { pStack[1].type = pElems->elem.common.type; - pStack[1].count = pElems->elem.count; + pStack[1].count = pElems->elem.blocklen; } else { pStack[1].type = OPAL_DATATYPE_UINT1; pStack[1].count = pData->size - count; diff --git a/opal/datatype/opal_datatype_optimize.c b/opal/datatype/opal_datatype_optimize.c index 48ea0f3c78b..fbaacb592c2 100644 --- a/opal/datatype/opal_datatype_optimize.c +++ b/opal/datatype/opal_datatype_optimize.c @@ -167,15 +167,18 @@ opal_datatype_optimize_short( opal_datatype_t* pData, if( ((last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) == (current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size)) && (current->disp == (last.disp + (ptrdiff_t)last.count * last.extent)) && - ((last.count == 1) || (current->count == 1) || (last.extent == current->extent)) ) { + ((current->count == 1) || (last.extent == current->extent)) ) { last.count += current->count; - if( last.count == 1 ) { - last.extent = current->extent; - } /* otherwise keep the last.extent */ /* find the lowest common denomitaor type */ if( last.common.type != current->common.type ) { - last.common.type = OPAL_DATATYPE_UINT1; last.blocklen *= opal_datatype_basicDatatypes[last.common.type]->size; + last.common.type = OPAL_DATATYPE_UINT1; + } + /* maximize the contiguous pieces */ + if( last.extent == (ptrdiff_t)(last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) ) { + last.blocklen *= last.count; + last.count = 1; + last.extent = last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size; } continue; /* next data */ } diff --git a/test/datatype/ddt_raw2.c b/test/datatype/ddt_raw2.c index cc78e23006a..7e91a323f7a 100644 --- a/test/datatype/ddt_raw2.c +++ b/test/datatype/ddt_raw2.c @@ -33,9 +33,6 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype, uint32_t *iovec_count, int increment) { - - - opal_convertor_t *convertor; size_t remaining_length = 0; uint32_t i; @@ -43,7 +40,6 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype, struct iovec *temp_iov=NULL; size_t temp_data; - convertor = opal_convertor_create( opal_local_arch, 0 ); if (OMPI_SUCCESS != opal_convertor_prepare_for_send (convertor, @@ -55,9 +51,9 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype, } if ( 0 == datatype->super.size ) { - *iovec_count = 0; - *iov = NULL; - return OMPI_SUCCESS; + *iovec_count = 0; + *iov = NULL; + return OMPI_SUCCESS; } remaining_length = count * datatype->super.size; @@ -69,10 +65,8 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype, return OMPI_ERR_OUT_OF_RESOURCE; } - while (0 == opal_convertor_raw(convertor, - temp_iov, - &temp_count, - &temp_data)) { + while (0 == opal_convertor_raw(convertor, temp_iov, + &temp_count, &temp_data)) { *iovec_count = *iovec_count + temp_count; *iov = (struct iovec *) realloc (*iov, *iovec_count * sizeof(struct iovec)); if (NULL == *iov) { @@ -80,7 +74,7 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype, free(temp_iov); return OMPI_ERR_OUT_OF_RESOURCE; } - for (i=0 ; i 0 ) { - *iov = (struct iovec *) realloc (*iov, *iovec_count * sizeof(struct iovec)); - if (NULL == *iov) { - opal_output(1, "OUT OF MEMORY\n"); + *iov = (struct iovec *) realloc (*iov, *iovec_count * sizeof(struct iovec)); + if (NULL == *iov) { + opal_output(1, "OUT OF MEMORY\n"); free(temp_iov); - return OMPI_ERR_OUT_OF_RESOURCE; - } + return OMPI_ERR_OUT_OF_RESOURCE; + } } for (i=0 ; idesc.used + 2 ); - if( (bLength == stride) || (1 >= count) ) { /* the elements are contiguous */ + if( (bLength == stride) || (1 == count) ) { /* the elements are contiguous */ opal_datatype_add( pData, oldType, count * bLength, 0, extent ); } else { if( 1 == bLength ) { @@ -476,7 +476,7 @@ static int32_t opal_datatype_create_hvector( int count, int bLength, ptrdiff_t s } pTempData = opal_datatype_create( oldType->desc.used + 2 ); - if( ((extent * bLength) == stride) || (1 >= count) ) { /* contiguous */ + if( ((extent * bLength) == stride) || (1 == count) ) { /* contiguous */ pData = pTempData; opal_datatype_add( pData, oldType, count * bLength, 0, extent ); } else { diff --git a/test/datatype/to_self.c b/test/datatype/to_self.c index 58849f5e90c..073fe4f0b57 100644 --- a/test/datatype/to_self.c +++ b/test/datatype/to_self.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ @@ -15,8 +15,9 @@ #include #include #include +#include -#if OPEN_MPI && 0 +#if 0 && OPEN_MPI extern void ompi_datatype_dump( MPI_Datatype ddt ); #define MPI_DDT_DUMP(ddt) ompi_datatype_dump( (ddt) ) #else @@ -178,23 +179,145 @@ create_indexed_gap_optimized_ddt( void ) return dt3; } -static void print_result( int length, int cycles, double time ) -{ - double bandwidth, clock_prec; +/******************************************************************** + *******************************************************************/ + +#define DO_CONTIG 0x00000001 +#define DO_CONSTANT_GAP 0x00000002 +#define DO_INDEXED_GAP 0x00000004 +#define DO_OPTIMIZED_INDEXED_GAP 0x00000008 +#define DO_STRUCT_CONSTANT_GAP_RESIZED 0x00000010 + +#define DO_PACK 0x01000000 +#define DO_UNPACK 0x02000000 +#define DO_ISEND_RECV 0x04000000 +#define DO_ISEND_IRECV 0x08000000 +#define DO_IRECV_SEND 0x10000000 +#define DO_IRECV_ISEND 0x20000000 + +#define MIN_LENGTH 1024 +#define MAX_LENGTH (1024*1024) + +static int cycles = 100; +static int trials = 20; +static int warmups = 2; + +static void print_result( int length, int trials, double* timers ) +{ + double bandwidth, clock_prec, temp; + double min_time, max_time, average, std_dev = 0.0; + double ordered[trials]; + int t, pos, quartile_start, quartile_end; + + for( t = 0; t < trials; ordered[t] = timers[t], t++ ); + for( t = 0; t < trials-1; t++ ) { + temp = ordered[t]; + pos = t; + for( int i = t+1; i < trials; i++ ) { + if( temp > ordered[i] ) { + temp = ordered[i]; + pos = i; + } + } + if( pos != t ) { + temp = ordered[t]; + ordered[t] = ordered[pos]; + ordered[pos] = temp; + } + } + quartile_start = trials - (3 * trials) / 4; + quartile_end = trials - (1 * trials) / 4; clock_prec = MPI_Wtick(); - bandwidth = (length * clock_prec * cycles) / (1024.0 * 1024.0) / (time * clock_prec); - printf( "%8d\t%.6f\t%.4f MB/s\n", length, time / cycles, bandwidth ); + min_time = ordered[quartile_start]; + max_time = ordered[quartile_start]; + average = ordered[quartile_start]; + for( t = quartile_start + 1; t < quartile_end; t++ ) { + if( min_time > ordered[t] ) min_time = ordered[t]; + if( max_time < ordered[t] ) max_time = ordered[t]; + average += ordered[t]; + } + average /= (quartile_end - quartile_start); + for( t = quartile_start; t < quartile_end; t++ ) { + std_dev += (ordered[t] - average) * (ordered[t] - average); + } + std_dev = sqrt( std_dev/(quartile_end - quartile_start) ); + + bandwidth = (length * clock_prec) / (1024.0 * 1024.0) / (average * clock_prec); + printf( "%8d\t%15g\t%10.4f MB/s [min %10g max %10g std %2.2f%%]\n", length, average, bandwidth, + min_time, max_time, (100.0 * std_dev) / average ); +} + +static int pack( int cycles, + MPI_Datatype sdt, int scount, void* sbuf, + void* packed_buf ) +{ + int position, myself, c, t, outsize; + double timers[trials]; + + MPI_Type_size( sdt, &outsize ); + outsize *= scount; + + MPI_Comm_rank( MPI_COMM_WORLD, &myself ); + + for( t = 0; t < warmups; t++ ) { + for( c = 0; c < cycles; c++ ) { + position = 0; + MPI_Pack(sbuf, scount, sdt, packed_buf, outsize, &position, MPI_COMM_WORLD); + } + } + + for( t = 0; t < trials; t++ ) { + timers[t] = MPI_Wtime(); + for( c = 0; c < cycles; c++ ) { + position = 0; + MPI_Pack(sbuf, scount, sdt, packed_buf, outsize, &position, MPI_COMM_WORLD); + } + timers[t] = (MPI_Wtime() - timers[t]) / cycles; + } + print_result( outsize, trials, timers ); + return 0; +} + +static int unpack( int cycles, + void* packed_buf, + MPI_Datatype rdt, int rcount, void* rbuf ) +{ + int position, myself, c, t, insize; + double timers[trials]; + + MPI_Type_size( rdt, &insize ); + insize *= rcount; + + MPI_Comm_rank( MPI_COMM_WORLD, &myself ); + + for( t = 0; t < warmups; t++ ) { + for( c = 0; c < cycles; c++ ) { + position = 0; + MPI_Unpack(packed_buf, insize, &position, rbuf, rcount, rdt, MPI_COMM_WORLD); + } + } + + for( t = 0; t < trials; t++ ) { + timers[t] = MPI_Wtime(); + for( c = 0; c < cycles; c++ ) { + position = 0; + MPI_Unpack(packed_buf, insize, &position, rbuf, rcount, rdt, MPI_COMM_WORLD); + } + timers[t] = (MPI_Wtime() - timers[t]) / cycles; + } + print_result( insize, trials, timers ); + return 0; } static int isend_recv( int cycles, MPI_Datatype sdt, int scount, void* sbuf, MPI_Datatype rdt, int rcount, void* rbuf ) { - int myself, tag = 0, i, slength, rlength; + int myself, tag = 0, c, t, slength, rlength; MPI_Status status; MPI_Request req; - double tstart, tend; + double timers[trials]; MPI_Type_size( sdt, &slength ); slength *= scount; @@ -203,21 +326,16 @@ static int isend_recv( int cycles, MPI_Comm_rank( MPI_COMM_WORLD, &myself ); - tstart = MPI_Wtime(); - for( i = 0; i < cycles; i++ ) { -#ifndef FAST - MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &req ); - MPI_Recv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &status ); - MPI_Wait( &req, &status ); - /*MPI_Request_free( &req );*/ -#else - ftmpi_mpi_isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &req ); - ftmpi_mpi_recv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &status ); - ftmpi_request_free( &req ); -#endif + for( t = 0; t < trials; t++ ) { + timers[t] = MPI_Wtime(); + for( c = 0; c < cycles; c++ ) { + MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &req ); + MPI_Recv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &status ); + MPI_Wait( &req, &status ); + } + timers[t] = (MPI_Wtime() - timers[t]) / cycles; } - tend = MPI_Wtime(); - print_result( rlength, cycles, tend - tstart ); + print_result( rlength, trials, timers ); return 0; } @@ -225,10 +343,10 @@ static int irecv_send( int cycles, MPI_Datatype sdt, int scount, void* sbuf, MPI_Datatype rdt, int rcount, void* rbuf ) { - int myself, tag = 0, i, slength, rlength; + int myself, tag = 0, c, t, slength, rlength; MPI_Request req; MPI_Status status; - double tstart, tend; + double timers[trials]; MPI_Type_size( sdt, &slength ); slength *= scount; @@ -237,21 +355,16 @@ static int irecv_send( int cycles, MPI_Comm_rank( MPI_COMM_WORLD, &myself ); - tstart = MPI_Wtime(); - for( i = 0; i < cycles; i++ ) { -#ifndef FAST - MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &req ); - MPI_Send( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD ); - MPI_Wait( &req, &status ); - /*MPI_Request_free( &req );*/ -#else - ftmpi_mpi_irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &req ); - ftmpi_mpi_send( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD ); - ftmpi_request_free( &req ); -#endif + for( t = 0; t < trials; t++ ) { + timers[t] = MPI_Wtime(); + for( c = 0; c < cycles; c++ ) { + MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &req ); + MPI_Send( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD ); + MPI_Wait( &req, &status ); + } + timers[t] = (MPI_Wtime() - timers[t]) / cycles; } - tend = MPI_Wtime(); - print_result( rlength, cycles, tend - tstart ); + print_result( rlength, trials, timers ); return 0; } @@ -259,10 +372,10 @@ static int isend_irecv_wait( int cycles, MPI_Datatype sdt, int scount, void* sbuf, MPI_Datatype rdt, int rcount, void* rbuf ) { - int myself, tag = 0, i, slength, rlength; - MPI_Request sreq, rreq; - MPI_Status status; - double tstart, tend; + int myself, tag = 0, c, t, slength, rlength; + MPI_Request requests[2]; + MPI_Status statuses[2]; + double timers[trials]; MPI_Type_size( sdt, &slength ); slength *= scount; @@ -271,25 +384,16 @@ static int isend_irecv_wait( int cycles, MPI_Comm_rank( MPI_COMM_WORLD, &myself ); - tstart = MPI_Wtime(); - for( i = 0; i < cycles; i++ ) { -#ifndef FAST - MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq ); - MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq ); - MPI_Wait( &sreq, &status ); - MPI_Wait( &rreq, &status ); - /*MPI_Request_free( &sreq );*/ - /*MPI_Request_free( &rreq );*/ -#else - ftmpi_mpi_isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq ); - ftmpi_mpi_irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq ); - ftmpi_wait( &sreq, &status ); - ftmpi_request_free( &sreq ); - ftmpi_request_free( &rreq ); -#endif + for( t = 0; t < trials; t++ ) { + timers[t] = MPI_Wtime(); + for( c = 0; c < cycles; c++ ) { + MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &requests[0] ); + MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &requests[1] ); + MPI_Waitall( 2, requests, statuses ); + } + timers[t] = (MPI_Wtime() - timers[t]) / cycles; } - tend = MPI_Wtime(); - print_result( rlength, cycles, tend - tstart ); + print_result( rlength, trials, timers ); return 0; } @@ -297,10 +401,10 @@ static int irecv_isend_wait( int cycles, MPI_Datatype sdt, int scount, void* sbuf, MPI_Datatype rdt, int rcount, void* rbuf ) { - int myself, tag = 0, i, slength, rlength; - MPI_Request sreq, rreq; - MPI_Status status; - double tstart, tend; + int myself, tag = 0, c, t, slength, rlength; + MPI_Request requests[2]; + MPI_Status statuses[2]; + double timers[trials]; MPI_Type_size( sdt, &slength ); slength *= scount; @@ -309,74 +413,82 @@ static int irecv_isend_wait( int cycles, MPI_Comm_rank( MPI_COMM_WORLD, &myself ); - tstart = MPI_Wtime(); - for( i = 0; i < cycles; i++ ) { -#ifndef FAST - MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq ); - MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq ); - MPI_Wait( &sreq, &status ); - MPI_Wait( &rreq, &status ); - /*MPI_Request_free( &sreq );*/ - /*MPI_Request_free( &rreq );*/ -#else - ftmpi_mpi_irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq ); - ftmpi_mpi_isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq ); - ftmpi_wait( &sreq, &status ); - ftmpi_request_free( &sreq ); - ftmpi_request_free( &rreq ); -#endif + for( t = 0; t < trials; t++ ) { + timers[t] = MPI_Wtime(); + for( c = 0; c < cycles; c++ ) { + MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &requests[0] ); + MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &requests[1] ); + MPI_Waitall( 2, requests, statuses ); + } + timers[t] = (MPI_Wtime() - timers[t]) / cycles; } - tend = MPI_Wtime(); - print_result( rlength, cycles, tend - tstart ); + print_result( rlength, trials, timers); return 0; } -static int do_test_for_ddt( MPI_Datatype sddt, MPI_Datatype rddt, int length ) +static int do_test_for_ddt( int doop, MPI_Datatype sddt, MPI_Datatype rddt, int length ) { - int i; MPI_Aint lb, extent; char *sbuf, *rbuf; + int i; MPI_Type_get_extent( sddt, &lb, &extent ); sbuf = (char*)malloc( length ); rbuf = (char*)malloc( length ); - printf( "# Isend recv (length %d)\n", length ); - for( i = 1; i <= (length/extent); i *= 2 ) { - isend_recv( 10, sddt, i, sbuf, rddt, i, rbuf ); + if( doop & DO_PACK ) { + printf("# Pack (max length %d)\n", length); + for( i = 1; i <= (length/extent); i *= 2 ) { + pack( cycles, sddt, i, sbuf, rbuf ); + } } - printf( "# Isend Irecv Wait (length %d)\n", length ); - for( i = 1; i <= (length/extent); i *= 2 ) { - isend_irecv_wait( 10, sddt, i, sbuf, rddt, i, rbuf ); + + if( doop & DO_UNPACK ) { + printf("# Unpack (length %d)\n", length); + for( i = 1; i <= (length/extent); i *= 2 ) { + unpack( cycles, sbuf, rddt, i, rbuf ); + } } - printf( "# Irecv send (length %d)\n", length ); - for( i = 1; i <= (length/extent); i *= 2 ) { - irecv_send( 10, sddt, i, sbuf, rddt, i, rbuf ); + + if( doop & DO_ISEND_RECV ) { + printf( "# Isend recv (length %d)\n", length ); + for( i = 1; i <= (length/extent); i *= 2 ) { + isend_recv( cycles, sddt, i, sbuf, rddt, i, rbuf ); + } } - printf( "# Irecv Isend Wait (length %d)\n", length ); - for( i = 1; i <= (length/extent); i *= 2 ) { - irecv_isend_wait( 10, sddt, i, sbuf, rddt, i, rbuf ); + + if( doop & DO_ISEND_IRECV ) { + printf( "# Isend Irecv Wait (length %d)\n", length ); + for( i = 1; i <= (length/extent); i *= 2 ) { + isend_irecv_wait( cycles, sddt, i, sbuf, rddt, i, rbuf ); + } + } + + if( doop & DO_IRECV_SEND ) { + printf( "# Irecv send (length %d)\n", length ); + for( i = 1; i <= (length/extent); i *= 2 ) { + irecv_send( cycles, sddt, i, sbuf, rddt, i, rbuf ); + } + } + + if( doop & DO_IRECV_SEND ) { + printf( "# Irecv Isend Wait (length %d)\n", length ); + for( i = 1; i <= (length/extent); i *= 2 ) { + irecv_isend_wait( cycles, sddt, i, sbuf, rddt, i, rbuf ); + } } free( sbuf ); free( rbuf ); return 0; } -#define DO_CONTIG 0x01 -#define DO_CONSTANT_GAP 0x02 -#define DO_INDEXED_GAP 0x04 -#define DO_OPTIMIZED_INDEXED_GAP 0x08 -#define DO_STRUCT_CONSTANT_GAP_RESIZED 0x10 - -#define MIN_LENGTH 1024 -#define MAX_LENGTH (1024*1024) - int main( int argc, char* argv[] ) { - int run_tests = 0xffffffff; /* do all tests by default */ - int length, rank, size; + int run_tests = 0xffff; /* do all datatype tests by default */ + int rank, size; MPI_Datatype ddt; - /*int run_tests = DO_CONSTANT_GAP;*/ + run_tests |= DO_PACK | DO_UNPACK; + MPI_Init (&argc, &argv); MPI_Comm_rank (MPI_COMM_WORLD, &rank); @@ -389,16 +501,14 @@ int main( int argc, char* argv[] ) if( run_tests & DO_CONTIG ) { printf( "\ncontiguous datatype\n\n" ); - for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 ) - do_test_for_ddt( MPI_INT, MPI_INT, length ); + do_test_for_ddt( run_tests, MPI_INT, MPI_INT, MAX_LENGTH ); } if( run_tests & DO_INDEXED_GAP ) { printf( "\nindexed gap\n\n" ); ddt = create_indexed_gap_ddt(); MPI_DDT_DUMP( ddt ); - for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 ) - do_test_for_ddt( ddt, ddt, length ); + do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH ); MPI_Type_free( &ddt ); } @@ -406,8 +516,7 @@ int main( int argc, char* argv[] ) printf( "\noptimized indexed gap\n\n" ); ddt = create_indexed_gap_optimized_ddt(); MPI_DDT_DUMP( ddt ); - for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 ) - do_test_for_ddt( ddt, ddt, length ); + do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH ); MPI_Type_free( &ddt ); } @@ -415,8 +524,7 @@ int main( int argc, char* argv[] ) printf( "\nconstant indexed gap\n\n" ); ddt = create_indexed_constant_gap_ddt( 80, 100, 1 ); MPI_DDT_DUMP( ddt ); - for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 ) - do_test_for_ddt( ddt, ddt, length ); + do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH ); MPI_Type_free( &ddt ); } @@ -424,8 +532,7 @@ int main( int argc, char* argv[] ) printf( "\noptimized constant indexed gap\n\n" ); ddt = create_optimized_indexed_constant_gap_ddt( 80, 100, 1 ); MPI_DDT_DUMP( ddt ); - for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 ) - do_test_for_ddt( ddt, ddt, length ); + do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH ); MPI_Type_free( &ddt ); } @@ -433,8 +540,7 @@ int main( int argc, char* argv[] ) printf( "\nstruct constant gap resized\n\n" ); ddt = create_struct_constant_gap_resized_ddt( 0 /* unused */, 0 /* unused */, 0 /* unused */ ); MPI_DDT_DUMP( ddt ); - for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 ) - do_test_for_ddt( ddt, ddt, length ); + do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH ); MPI_Type_free( &ddt ); } diff --git a/test/datatype/unpack_ooo.c b/test/datatype/unpack_ooo.c index 458ef550930..58ef8a95774 100644 --- a/test/datatype/unpack_ooo.c +++ b/test/datatype/unpack_ooo.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2014 The University of Tennessee and The University + * Copyright (c) 2014-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science @@ -18,7 +18,6 @@ #include "opal/runtime/opal.h" #include "opal/datatype/opal_convertor.h" #include "opal/datatype/opal_datatype_internal.h" -// #include #include #include #include @@ -61,6 +60,18 @@ static void print_bar_pbar(struct foo_t* bar, struct pfoo_t* pbar) fprintf(stderr, "\n"); } +static void print_stack(opal_convertor_t* conv) +{ + printf("Stack pos %d [converted %" PRIsize_t "/%" PRIsize_t "]\n", + conv->stack_pos, conv->bConverted, conv->local_size); + for( uint32_t i = 0; i <= conv->stack_pos; i++ ) { + printf( "[%u] index %d, type %s count %" PRIsize_t " disp %p\n", + i, conv->pStack[i].index, opal_datatype_basicDatatypes[conv->pStack[i].type]->name, + conv->pStack[i].count, (void*)conv->pStack[i].disp); + } + printf("\n"); +} + static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) { int i, j, errors = 0; struct iovec a; @@ -104,6 +115,7 @@ static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) { max_data = a.iov_len; pos = arr[i][1]; opal_convertor_set_position(pConv, &pos); + print_stack(pConv); assert(arr[i][1] == pos); opal_convertor_unpack( pConv, &a, &iov_count, &max_data ); a.iov_base = (char*)a.iov_base - 1024; @@ -118,9 +130,10 @@ static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) { bar[j].d[1] != 0.0 || bar[j].d[2] != pbar[j].d[1]) { if(0 == errors) { - fprintf(stderr, "ERROR ! count=%d, position=%d, ptr = %p" + (void)opal_datatype_dump(&newtype->super); + fprintf(stderr, "ERROR ! position=%d/%d, ptr = %p" " got (%d,%d,%d,%g,%g,%g) expected (%d,%d,%d,%g,%g,%g)\n", - N, j, (void*)&bar[j], + j, N, (void*)&bar[j], bar[j].i[0], bar[j].i[1], bar[j].i[2], From 012a00480616cfd30c91de50635c0718d5cde72d Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 20 May 2019 11:43:29 -0400 Subject: [PATCH 52/94] Clean and sync the pack and unpack functions. - optimize handling of contiguous with gaps datatypes. - fixes a performance issue for all datatypes with a count of 1. - optimize the pack/unpack of contiguous with gaps datatype. - optimize the case of blocklen == 1 Signed-off-by: George Bosilca --- opal/datatype/opal_convertor_raw.c | 6 +- opal/datatype/opal_datatype_copy.h | 32 ++-- opal/datatype/opal_datatype_module.c | 1 - opal/datatype/opal_datatype_pack.c | 218 ++++++++++--------------- opal/datatype/opal_datatype_pack.h | 108 ++++++------ opal/datatype/opal_datatype_position.c | 60 ++++--- opal/datatype/opal_datatype_unpack.c | 128 +++++++-------- opal/datatype/opal_datatype_unpack.h | 112 +++++++------ 8 files changed, 314 insertions(+), 351 deletions(-) diff --git a/opal/datatype/opal_convertor_raw.c b/opal/datatype/opal_convertor_raw.c index df2340122a9..893792583f9 100644 --- a/opal/datatype/opal_convertor_raw.c +++ b/opal/datatype/opal_convertor_raw.c @@ -31,8 +31,8 @@ #endif /* OPAL_ENABLE_DEBUG */ /* Take a new iovec (base + len) and try to merge it with what we already - * have. If we succeed return 0 and move forward, if not save it into a new - * iovec location. If we need to go to a new position and we reach the end + * have. If we succeed return 0 and move forward, otherwise save it into a new + * iovec location. If we need to advance position and we reach the end * of the iovec array, return 1 to signal we did not saved the last iovec. */ static inline int @@ -46,7 +46,7 @@ opal_convertor_merge_iov( struct iovec* iov, uint32_t* iov_count, return 0; } /* cannot merge, move to the next position */ *idx = *idx + 1; - if( *idx == *iov_count ) return 1; /* do not overwrite outside the iove array boundaries */ + if( *idx == *iov_count ) return 1; /* do not overwrite outside the iovec array boundaries */ } iov[*idx].iov_base = base; iov[*idx].iov_len = len; diff --git a/opal/datatype/opal_datatype_copy.h b/opal/datatype/opal_datatype_copy.h index 40f119a684d..11058012e1e 100644 --- a/opal/datatype/opal_datatype_copy.h +++ b/opal/datatype/opal_datatype_copy.h @@ -51,11 +51,9 @@ static inline void _predefined_data( const dt_elem_desc_t* ELEM, const ddt_elem_desc_t* _elem = &((ELEM)->elem); unsigned char* _source = (SOURCE) + _elem->disp; unsigned char* _destination = (DESTINATION) + _elem->disp; - size_t total_count = _elem->count * _elem->blocklen; - size_t do_now, do_now_bytes; + size_t do_now = _elem->count, do_now_bytes; - assert( (COUNT) == total_count); - assert( total_count <= ((*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size) ); + assert( (COUNT) == (do_now * _elem->blocklen)); /* We don't a prologue and epilogue here as we are __always__ working * with full copies of the data description. @@ -64,21 +62,19 @@ static inline void _predefined_data( const dt_elem_desc_t* ELEM, /** * Compute how many full blocklen we need to do and do them. */ - do_now = _elem->count; - if( 0 != do_now ) { - do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size; - for(size_t _i = 0; _i < do_now; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _source, do_now_bytes, (SOURCE_BASE), - (DATATYPE), (TOTAL_COUNT) ); - DO_DEBUG( opal_output( 0, "copy %s( %p, %p, %" PRIsize_t " ) => space %" PRIsize_t "\n", - STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, do_now_bytes, *(SPACE) ); ); - MEM_OP( _destination, _source, do_now_bytes ); - _destination += _elem->extent; - _source += _elem->extent; - *(SPACE) -= do_now_bytes; - } - (COUNT) -= total_count; + do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size; + assert( (do_now * do_now_bytes) <= (*SPACE) ); + + for(size_t _i = 0; _i < do_now; _i++ ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( _source, do_now_bytes, (SOURCE_BASE), + (DATATYPE), (TOTAL_COUNT) ); + DO_DEBUG( opal_output( 0, "copy %s( %p, %p, %" PRIsize_t " ) => space %" PRIsize_t "\n", + STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, do_now_bytes, *(SPACE) - _i * do_now_bytes ); ); + MEM_OP( _destination, _source, do_now_bytes ); + _destination += _elem->extent; + _source += _elem->extent; } + *(SPACE) -= (do_now_bytes * do_now); } static inline void _contiguous_loop( const dt_elem_desc_t* ELEM, diff --git a/opal/datatype/opal_datatype_module.c b/opal/datatype/opal_datatype_module.c index 7976392b63e..d4415b21ef1 100644 --- a/opal/datatype/opal_datatype_module.c +++ b/opal/datatype/opal_datatype_module.c @@ -224,7 +224,6 @@ int32_t opal_datatype_init( void ) OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS; datatype->desc.desc[0].elem.common.type = i; - /* datatype->desc.desc[0].elem.blocklen XXX not set at the moment, it will be needed later */ datatype->desc.desc[0].elem.count = 1; datatype->desc.desc[0].elem.blocklen = 1; datatype->desc.desc[0].elem.disp = 0; diff --git a/opal/datatype/opal_datatype_pack.c b/opal/datatype/opal_datatype_pack.c index 55889fcaa55..cf69f6ada22 100644 --- a/opal/datatype/opal_datatype_pack.c +++ b/opal/datatype/opal_datatype_pack.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -53,8 +53,6 @@ #endif /* defined(CHECKSUM) */ -#define IOVEC_MEM_LIMIT 8192 - /* the contig versions does not use the stack. They can easily retrieve * the status with just the informations from pConvertor->bConverted. */ @@ -68,9 +66,8 @@ opal_pack_homogeneous_contig_function( opal_convertor_t* pConv, unsigned char *source_base = NULL; uint32_t iov_count; size_t length = pConv->local_size - pConv->bConverted, initial_amount = pConv->bConverted; - ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; - source_base = (pConv->pBaseBuf + initial_displ + pStack[0].disp + pStack[1].disp); + source_base = (pConv->pBaseBuf + pConv->pDesc->true_lb + pStack[0].disp + pStack[1].disp); /* There are some optimizations that can be done if the upper level * does not provide a buffer. @@ -111,155 +108,116 @@ opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv, uint32_t* out_size, size_t* max_data ) { + size_t remaining, length, initial_bytes_converted = pConv->bConverted; const opal_datatype_t* pData = pConv->pDesc; dt_stack_t* stack = pConv->pStack; + ptrdiff_t extent = pData->ub - pData->lb; unsigned char *user_memory, *packed_buffer; - uint32_t iov_count, index; + uint32_t idx; size_t i; - size_t bConverted, remaining, length, initial_bytes_converted = pConv->bConverted; - ptrdiff_t extent= pData->ub - pData->lb; - ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; + /* The memory layout is contiguous with gaps in the begining and at the end. The datatype true_lb + * is the initial displacement, the size the length of the contiguous area and the extent represent + * how much we should jump between elements. + */ assert( (pData->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && ((ptrdiff_t)pData->size != extent) ); DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n", (void*)pConv->pBaseBuf, *out_size ); ); if( stack[1].type != opal_datatype_uint1.id ) { stack[1].count *= opal_datatype_basicDatatypes[stack[1].type]->size; - stack[1].type = opal_datatype_uint1.id; + stack[1].type = opal_datatype_uint1.id; + } + /* We can provide directly the pointers in the user buffers (like the convertor_raw) */ + if( NULL == iov[0].iov_base ) { + user_memory = pConv->pBaseBuf + pData->true_lb; + + for( idx = 0; (idx < (*out_size)) && stack[0].count; idx++ ) { + iov[idx].iov_base = user_memory + stack[0].disp + stack[1].disp; + iov[idx].iov_len = stack[1].count; + COMPUTE_CSUM( iov[idx].iov_base, iov[idx].iov_len, pConv ); + + pConv->bConverted += stack[1].count; + + stack[0].disp += extent; + stack[0].count--; + stack[1].disp = 0; + stack[1].count = pData->size; /* we might need this to update the partial + * length for the first iteration */ + } + goto update_status_and_return; } - /* There are some optimizations that can be done if the upper level - * does not provide a buffer. - */ - for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { + for( idx = 0; idx < (*out_size); idx++ ) { /* Limit the amount of packed data to the data left over on this convertor */ remaining = pConv->local_size - pConv->bConverted; if( 0 == remaining ) break; /* we're done this time */ - if( remaining > iov[iov_count].iov_len ) - remaining = iov[iov_count].iov_len; - packed_buffer = (unsigned char *)iov[iov_count].iov_base; - bConverted = remaining; /* how much will get unpacked this time */ - user_memory = pConv->pBaseBuf + initial_displ + stack[0].disp + stack[1].disp; - i = pConv->count - stack[0].count; /* how many we already packed */ - assert(i == (pConv->bConverted / pData->size)); - - if( packed_buffer == NULL ) { - /* special case for small data. We avoid allocating memory if we - * can fill the iovec directly with the address of the remaining - * data. - */ - if( stack->count < (size_t)((*out_size) - iov_count) ) { - stack[1].count = pData->size - (pConv->bConverted % pData->size); - for( index = iov_count; i < pConv->count; i++, index++ ) { - iov[index].iov_base = (IOVBASE_TYPE *) user_memory; - iov[index].iov_len = stack[1].count; - stack[0].disp += extent; - pConv->bConverted += stack[1].count; - stack[1].disp = 0; /* reset it for the next round */ - stack[1].count = pData->size; - user_memory = pConv->pBaseBuf + initial_displ + stack[0].disp; - COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv ); - } - *out_size = iov_count + index; - *max_data = (pConv->bConverted - initial_bytes_converted); - pConv->flags |= CONVERTOR_COMPLETED; - return 1; /* we're done */ - } - /* now special case for big contiguous data with gaps around */ - if( pData->size >= IOVEC_MEM_LIMIT ) { - /* as we dont have to copy any data, we can simply fill the iovecs - * with data from the user data description. - */ - for( index = iov_count; (i < pConv->count) && (index < (*out_size)); - i++, index++ ) { - if( remaining < pData->size ) { - iov[index].iov_base = (IOVBASE_TYPE *) user_memory; - iov[index].iov_len = remaining; - remaining = 0; - COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv ); - break; - } else { - iov[index].iov_base = (IOVBASE_TYPE *) user_memory; - iov[index].iov_len = pData->size; - user_memory += extent; - COMPUTE_CSUM( iov[index].iov_base, (size_t)iov[index].iov_len, pConv ); - } - remaining -= iov[index].iov_len; - pConv->bConverted += iov[index].iov_len; - } - *out_size = index; - *max_data = (pConv->bConverted - initial_bytes_converted); - if( pConv->bConverted == pConv->local_size ) { - pConv->flags |= CONVERTOR_COMPLETED; - return 1; - } - return 0; + if( remaining > iov[idx].iov_len ) + remaining = iov[idx].iov_len; + packed_buffer = (unsigned char *)iov[idx].iov_base; + pConv->bConverted += remaining; + user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp; + + DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( user_memory %p, packed_buffer %p length %" PRIsize_t "\n", + (void*)user_memory, (void*)packed_buffer, remaining ); ); + + length = (0 == pConv->stack_pos ? 0 : stack[1].count); /* left over from the last pack */ + /* data left from last round and enough space in the buffer */ + if( (pData->size != length) && (length <= remaining)) { + /* copy the partial left-over from the previous round */ + OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf, + pData, pConv->count ); + DO_DEBUG( opal_output( 0, "pack dest %p src %p length %" PRIsize_t " [prologue]\n", + (void*)user_memory, (void*)packed_buffer, length ); ); + MEMCPY_CSUM( packed_buffer, user_memory, length, pConv ); + packed_buffer += length; + remaining -= length; + stack[1].count -= length; + stack[1].disp += length; /* just in case, we overwrite this below */ + if( 0 == stack[1].count) { /* one completed element */ + stack[0].count--; + stack[0].disp += extent; + if( 0 == stack[0].count ) /* not yet done */ + break; + stack[1].count = pData->size; + stack[1].disp = 0; } + user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp; } - { - DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( user_memory %p, packed_buffer %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); ); - - length = (0 == pConv->stack_pos ? 0 : stack[1].count); /* left over from the last pack */ - /* data left from last round and enough space in the buffer */ - if( (0 != length) && (length <= remaining)) { - /* copy the partial left-over from the previous round */ - OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf, - pData, pConv->count ); - DO_DEBUG( opal_output( 0, "2. pack dest %p src %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)length ); ); - MEMCPY_CSUM( packed_buffer, user_memory, length, pConv ); - packed_buffer += length; - user_memory += (extent - pData->size + length); - remaining -= length; - stack[1].count -= length; - if( 0 == stack[1].count) { /* one completed element */ - stack[0].count--; - stack[0].disp += extent; - if( 0 != stack[0].count ) { /* not yet done */ - stack[1].count = pData->size; - stack[1].disp = 0; - } - } - } - for( i = 0; pData->size <= remaining; i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf, - pData, pConv->count ); - DO_DEBUG( opal_output( 0, "3. pack dest %p src %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)pData->size ); ); - MEMCPY_CSUM( packed_buffer, user_memory, pData->size, pConv ); - packed_buffer += pData->size; - user_memory += extent; - remaining -= pData->size; - } - stack[0].count -= i; /* the filled up and the entire types */ - stack[0].disp += (i * extent); - stack[1].disp += remaining; - /* Copy the last bits */ - if( 0 != remaining ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf, - pData, pConv->count ); - DO_DEBUG( opal_output( 0, "4. pack dest %p src %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); ); - MEMCPY_CSUM( packed_buffer, user_memory, remaining, pConv ); - user_memory += remaining; - stack[1].count -= remaining; - } + for( i = 0; pData->size <= remaining; i++ ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf, + pData, pConv->count ); + DO_DEBUG( opal_output( 0, "pack dest %p src %p length %" PRIsize_t " [%" PRIsize_t "/%" PRIsize_t "\n", + (void*)user_memory, (void*)packed_buffer, pData->size, remaining, iov[idx].iov_len ); ); + MEMCPY_CSUM( packed_buffer, user_memory, pData->size, pConv ); + packed_buffer += pData->size; + user_memory += extent; + remaining -= pData->size; + } + stack[0].count -= i; /* the entire datatype copied above */ + stack[0].disp += (i * extent); + + /* Copy the last bits */ + if( 0 != remaining ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf, + pData, pConv->count ); + DO_DEBUG( opal_output( 0, "4. pack dest %p src %p length %" PRIsize_t "\n", + (void*)user_memory, (void*)packed_buffer, remaining ); ); + MEMCPY_CSUM( packed_buffer, user_memory, remaining, pConv ); + stack[1].count -= remaining; + stack[1].disp += remaining; /* keep the += in case we are copying less that the datatype size */ if( 0 == stack[1].count ) { /* prepare for the next element */ stack[1].count = pData->size; stack[1].disp = 0; } } - pConv->bConverted += bConverted; } - *out_size = iov_count; - *max_data = (pConv->bConverted - initial_bytes_converted); - if( pConv->bConverted == pConv->local_size ) { - pConv->flags |= CONVERTOR_COMPLETED; - return 1; - } - return 0; + + update_status_and_return: + *out_size = idx; + *max_data = pConv->bConverted - initial_bytes_converted; + if( pConv->bConverted == pConv->local_size ) pConv->flags |= CONVERTOR_COMPLETED; + return !!(pConv->flags & CONVERTOR_COMPLETED); /* done or not */ } /* The pack/unpack functions need a cleanup. I have to create a proper interface to access diff --git a/opal/datatype/opal_datatype_pack.h b/opal/datatype/opal_datatype_pack.h index 66259f8b66b..514f8bd7b02 100644 --- a/opal/datatype/opal_datatype_pack.h +++ b/opal/datatype/opal_datatype_pack.h @@ -35,82 +35,90 @@ pack_predefined_data( opal_convertor_t* CONVERTOR, size_t* SPACE ) { const ddt_elem_desc_t* _elem = &((ELEM)->elem); - size_t total_count = _elem->count * _elem->blocklen; size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size; size_t do_now, do_now_bytes; + size_t blocklen_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size; unsigned char* _memory = (*memory) + _elem->disp; + unsigned char* _packed = *packed; assert( *(COUNT) <= _elem->count * _elem->blocklen); if( cando_count > *(COUNT) ) cando_count = *(COUNT); - /** - * First check if we already did something on this element ? - */ - do_now = (total_count - *(COUNT)); /* done elements */ - if( 0 != do_now ) { - do_now = do_now % _elem->blocklen; /* partial blocklen? */ - - if( 0 != do_now ) { - size_t left_in_block = _elem->blocklen - do_now; /* left in the current blocklen */ - do_now = (left_in_block > cando_count ) ? cando_count : left_in_block; - do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; - - OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "pack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n", - (void*)*(packed), (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); - MEMCPY_CSUM( *(packed), _memory, do_now_bytes, (CONVERTOR) ); - _memory = (*memory) + _elem->disp + (ptrdiff_t)do_now_bytes; - /* compensate if we just completed a blocklen */ - if( do_now == left_in_block ) - _memory += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size); - *(packed) += do_now_bytes; - *(SPACE) -= do_now_bytes; - *(COUNT) -= do_now; - cando_count -= do_now; + if( 1 == _elem->blocklen ) { /* Do as many full blocklen as possible */ + *(COUNT) -= cando_count; + for(; cando_count > 0; cando_count--) { + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n", + (void*)_packed, (void*)_memory, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _packed, _memory, blocklen_bytes, (CONVERTOR) ); + _packed += blocklen_bytes; + _memory += _elem->extent; } + goto update_and_return; } + blocklen_bytes *= _elem->blocklen; /** - * Compute how many full blocklen we need to do and do them. + * First check if we already did something on this element ? The COUNT is the number + * of remaining predefined types in the current elem, not how many predefined types + * should be manipulated in the current call (this number is instead reflected on the + * SPACE). */ - do_now = cando_count / _elem->blocklen; + do_now = *(COUNT) % _elem->blocklen; /* any partial elements ? */ + /* premptively update the number of COUNT we will return. */ + *(COUNT) -= cando_count; if( 0 != do_now ) { - do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size; - for(size_t _i = 0; _i < do_now; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)*(packed), (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)*(SPACE) ); ); - MEMCPY_CSUM( *(packed), _memory, do_now_bytes, (CONVERTOR) ); - *(packed) += do_now_bytes; - _memory += _elem->extent; - *(SPACE) -= do_now_bytes; - *(COUNT) -= _elem->blocklen; - cando_count -= _elem->blocklen; - } + size_t left_in_block = do_now; /* left in the current blocklen */ + do_now = (do_now > cando_count ) ? cando_count : do_now; + do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; + + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "pack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n", + _packed, (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); + MEMCPY_CSUM( _packed, _memory, do_now_bytes, (CONVERTOR) ); + _memory += (ptrdiff_t)do_now_bytes; + /* compensate if we just completed a blocklen */ + if( do_now == left_in_block ) + _memory += _elem->extent - blocklen_bytes; + _packed += do_now_bytes; + cando_count -= do_now; + } + + /* Do as many full blocklen as possible */ + for(size_t _i = 0; _elem->blocklen <= cando_count; _i++ ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n", + (void*)_packed, (void*)_memory, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _packed, _memory, blocklen_bytes, (CONVERTOR) ); + _packed += blocklen_bytes; + _memory += _elem->extent; + cando_count -= _elem->blocklen; } /** * As an epilog do anything left from the last blocklen. */ - do_now = cando_count; - if( 0 != do_now ) { - do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; + if( 0 != cando_count ) { + assert( cando_count < _elem->blocklen ); + do_now_bytes = cando_count * opal_datatype_basicDatatypes[_elem->common.type]->size; OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); + (CONVERTOR)->pDesc, (CONVERTOR)->count ); DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n", - (void*)*(packed), (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); - MEMCPY_CSUM( *(packed), _memory, do_now_bytes, (CONVERTOR) ); + (void*)_packed, (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); + MEMCPY_CSUM( _packed, _memory, do_now_bytes, (CONVERTOR) ); _memory += do_now_bytes; - *(packed) += do_now_bytes; - *(SPACE) -= do_now_bytes; - *(COUNT) -= do_now; + _packed += do_now_bytes; } + update_and_return: *(memory) = _memory - _elem->disp; + *(SPACE) -= (_packed - *packed); + *(packed) = _packed; } static inline void pack_contiguous_loop( opal_convertor_t* CONVERTOR, diff --git a/opal/datatype/opal_datatype_position.c b/opal/datatype/opal_datatype_position.c index 381a31086d6..f8137c7e0cb 100644 --- a/opal/datatype/opal_datatype_position.c +++ b/opal/datatype/opal_datatype_position.c @@ -49,10 +49,24 @@ * - the DT_CONTIGUOUS flag for the type OPAL_DATATYPE_END_LOOP is meaningless. */ +static inline void +position_single_block(opal_convertor_t* CONVERTOR, + unsigned char** mem, ptrdiff_t mem_update, + size_t* space, size_t space_update, + size_t* cnt, size_t cnt_update) +{ + OPAL_DATATYPE_SAFEGUARD_POINTER( *mem, mem_update, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu [prolog]\n", + (void*)*mem, (unsigned long)space_update, (unsigned long)(*space) ); ); + *mem += mem_update; + *space -= space_update; + *cnt -= cnt_update; +} + /** - * Advance the current position in the convertor based using the - * current element and a left-over counter. Update the head pointer - * and the leftover byte space. + * Advance the convertors' position according. Update the pointer and the remaining space + * accordingly. */ static inline void position_predefined_data( opal_convertor_t* CONVERTOR, @@ -64,7 +78,7 @@ position_predefined_data( opal_convertor_t* CONVERTOR, const ddt_elem_desc_t* _elem = &((ELEM)->elem); size_t total_count = _elem->count * _elem->blocklen; size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size; - size_t do_now, do_now_bytes; + size_t do_now, do_now_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size; unsigned char* _memory = (*POINTER) + _elem->disp; assert( *(COUNT) <= _elem->count * _elem->blocklen); @@ -72,6 +86,15 @@ position_predefined_data( opal_convertor_t* CONVERTOR, if( cando_count > *(COUNT) ) cando_count = *(COUNT); + if( 1 == _elem->blocklen ) { + DO_DEBUG( opal_output( 0, "position( %p, %" PRIsize_t " ) x (count %" PRIsize_t ", extent %ld) => space %lu [prolog]\n", + (void*)_memory, (unsigned long)do_now_bytes, cando_count, _elem->extent, (unsigned long)(*SPACE) ); ); + _memory += cando_count * _elem->extent; + *SPACE -= cando_count * do_now_bytes; + *COUNT -= cando_count; + goto update_and_return; + } + /** * First check if we already did something on this element ? */ @@ -84,16 +107,12 @@ position_predefined_data( opal_convertor_t* CONVERTOR, do_now = (left_in_block > cando_count ) ? cando_count : left_in_block; do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; - OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu [prolog]\n", - (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); - _memory = *(POINTER) + _elem->disp + (ptrdiff_t)do_now_bytes; + position_single_block( CONVERTOR, &_memory, do_now_bytes, + SPACE, do_now_bytes, COUNT, do_now ); + /* compensate if we just completed a blocklen */ if( do_now == left_in_block ) _memory += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size); - *(SPACE) -= do_now_bytes; - *(COUNT) -= do_now; cando_count -= do_now; } } @@ -105,13 +124,8 @@ position_predefined_data( opal_convertor_t* CONVERTOR, if( 0 != do_now ) { do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size; for(size_t _i = 0; _i < do_now; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu\n", - (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)*(SPACE) ); ); - _memory += _elem->extent; - *(SPACE) -= do_now_bytes; - *(COUNT) -= _elem->blocklen; + position_single_block( CONVERTOR, &_memory, _elem->extent, + SPACE, do_now_bytes, COUNT, _elem->blocklen ); cando_count -= _elem->blocklen; } } @@ -122,15 +136,11 @@ position_predefined_data( opal_convertor_t* CONVERTOR, do_now = cando_count; if( 0 != do_now ) { do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; - OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu [epilog]\n", - (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); - _memory += do_now_bytes; - *(SPACE) -= do_now_bytes; - *(COUNT) -= do_now; + position_single_block( CONVERTOR, &_memory, do_now_bytes, + SPACE, do_now_bytes, COUNT, do_now ); } + update_and_return: *(POINTER) = _memory - _elem->disp; } diff --git a/opal/datatype/opal_datatype_unpack.c b/opal/datatype/opal_datatype_unpack.c index 3edb9161923..ac35a03c267 100644 --- a/opal/datatype/opal_datatype_unpack.c +++ b/opal/datatype/opal_datatype_unpack.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2017 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -70,98 +70,82 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv, { const opal_datatype_t *pData = pConv->pDesc; unsigned char *user_memory, *packed_buffer; - uint32_t iov_count, i; - size_t bConverted, remaining, length, initial_bytes_converted = pConv->bConverted; + uint32_t iov_idx, i; + size_t remaining, initial_bytes_converted = pConv->bConverted; dt_stack_t* stack = pConv->pStack; ptrdiff_t extent = pData->ub - pData->lb; - ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp; - DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n", + DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( pBaseBuf %p, iov count %d )\n", (void*)pConv->pBaseBuf, *out_size ); ); if( stack[1].type != opal_datatype_uint1.id ) { stack[1].count *= opal_datatype_basicDatatypes[stack[1].type]->size; stack[1].type = opal_datatype_uint1.id; } - for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { - remaining = pConv->local_size - pConv->bConverted; - if( 0 == remaining ) break; /* we're done this time */ - if( remaining > iov[iov_count].iov_len ) - remaining = iov[iov_count].iov_len; - packed_buffer = (unsigned char*)iov[iov_count].iov_base; - bConverted = remaining; /* how much will get unpacked this time */ - user_memory = pConv->pBaseBuf + initial_displ; - - if( (ptrdiff_t)pData->size == extent ) { - user_memory += pConv->bConverted; - DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( user_memory %p, packed_buffer %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); ); + + if( (ptrdiff_t)pData->size == extent ) { + for( iov_idx = 0; iov_idx < (*out_size); iov_idx++ ) { + remaining = pConv->local_size - pConv->bConverted; + if( 0 == remaining ) break; /* we're done this time */ + if( remaining > iov[iov_idx].iov_len ) + remaining = iov[iov_idx].iov_len; + + packed_buffer = (unsigned char*)iov[iov_idx].iov_base; + user_memory = pConv->pBaseBuf + pData->true_lb + pConv->bConverted; /* contiguous data or basic datatype with count */ OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf, pData, pConv->count ); - DO_DEBUG( opal_output( 0, "1. unpack contig dest %p src %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); ); + DO_DEBUG( opal_output( 0, "unpack contig [%d] dest %p src %p length %" PRIsize_t "\n", + iov_idx, (void*)user_memory, (void*)packed_buffer, remaining ); ); MEMCPY_CSUM( user_memory, packed_buffer, remaining, pConv ); - } else { - user_memory += stack[0].disp + stack[1].disp; + pConv->bConverted += remaining; /* how much will get unpacked this time */ + } + } else { + for( iov_idx = 0; iov_idx < (*out_size); iov_idx++ ) { + remaining = pConv->local_size - pConv->bConverted; + if( 0 == remaining ) break; /* we're done this time */ + if( remaining > iov[iov_idx].iov_len ) + remaining = iov[iov_idx].iov_len; + + packed_buffer = (unsigned char*)iov[iov_idx].iov_base; + user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp; + pConv->bConverted += remaining; /* how much will get unpacked this time */ + + for( i = 0; stack[1].count <= remaining; i++ ) { /* partial or full data */ + OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, stack[1].count, pConv->pBaseBuf, + pData, pConv->count ); + DO_DEBUG( opal_output( 0, "unpack gaps [%d] dest %p src %p length %" PRIsize_t " [%d]\n", + iov_idx, (void*)user_memory, (void*)packed_buffer, stack[1].count, i ); ); + MEMCPY_CSUM( user_memory, packed_buffer, stack[1].count, pConv ); - DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( user_memory %p, packed_buffer %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); ); + packed_buffer += stack[1].count; + remaining -= stack[1].count; - length = (0 == pConv->stack_pos ? 0 : stack[1].count); /* left over from the last unpack */ - /* complete the last copy */ - if( (0 != length) && (length <= remaining) ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf, - pData, pConv->count ); - DO_DEBUG( opal_output( 0, "2. unpack dest %p src %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)length ); ); - MEMCPY_CSUM( user_memory, packed_buffer, length, pConv ); - packed_buffer += length; - user_memory += (extent - (pData->size - length)); - remaining -= length; - stack[1].count -= length; - if( 0 == stack[1].count) { /* one completed element */ - stack[0].count--; - stack[0].disp += extent; - if( 0 != stack[0].count ) { /* not yet done */ - stack[1].count = pData->size; - stack[1].disp = 0; - } - } - } - for( i = 0; pData->size <= remaining; i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf, - pData, pConv->count ); - DO_DEBUG( opal_output( 0, "3. unpack dest %p src %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)pData->size ); ); - MEMCPY_CSUM( user_memory, packed_buffer, pData->size, pConv ); - packed_buffer += pData->size; - user_memory += extent; - remaining -= pData->size; + stack[0].count--; + stack[0].disp += extent; + stack[1].count = pData->size; + stack[1].disp = 0; + + user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp; } - stack[0].count -= i; - stack[0].disp += (i * extent); - stack[1].disp += remaining; - /* copy the last bits */ + + /* Copy the last bits */ if( 0 != remaining ) { OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf, pData, pConv->count ); - DO_DEBUG( opal_output( 0, "4. unpack dest %p src %p length %lu\n", - (void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); ); + DO_DEBUG( opal_output( 0, "unpack gaps [%d] dest %p src %p length %" PRIsize_t " [epilog]\n", + iov_idx, (void*)user_memory, (void*)packed_buffer, remaining ); ); MEMCPY_CSUM( user_memory, packed_buffer, remaining, pConv ); - user_memory += remaining; stack[1].count -= remaining; + stack[1].disp += remaining; /* keep the += in case we are copying less that the datatype size */ + assert( stack[1].count ); } } - pConv->bConverted += bConverted; } - *out_size = iov_count; /* we only reach this line after the for loop succesfully complete */ - *max_data = (pConv->bConverted - initial_bytes_converted); - if( pConv->bConverted == pConv->local_size ) { - pConv->flags |= CONVERTOR_COMPLETED; - return 1; - } - return 0; + *out_size = iov_idx; /* we only reach this line after the for loop succesfully complete */ + *max_data = pConv->bConverted - initial_bytes_converted; + if( pConv->bConverted == pConv->local_size ) pConv->flags |= CONVERTOR_COMPLETED; + return !!(pConv->flags & CONVERTOR_COMPLETED); /* done or not */ } /** @@ -179,7 +163,7 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv, static inline void opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pElem, unsigned char* partial_data, - ptrdiff_t start_position, ptrdiff_t length, + ptrdiff_t start_position, size_t length, unsigned char** user_buffer ) { char unused_byte = 0x7F, saved_data[16]; @@ -195,7 +179,7 @@ opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pEle /* Find a byte that is not used in the partial buffer */ find_unused_byte: - for(ptrdiff_t i = 0; i < length; i++ ) { + for(size_t i = 0; i < length; i++ ) { if( unused_byte == partial_data[i] ) { unused_byte--; goto find_unused_byte; @@ -306,7 +290,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, COMPUTE_CSUM( iov_ptr, missing_length, pConvertor ); opal_unpack_partial_datatype( pConvertor, pElem, iov_ptr, - pConvertor->partial_length, element_length - pConvertor->partial_length, + pConvertor->partial_length, (size_t)(element_length - pConvertor->partial_length), &conv_ptr ); --count_desc; if( 0 == count_desc ) { diff --git a/opal/datatype/opal_datatype_unpack.h b/opal/datatype/opal_datatype_unpack.h index f51a609294d..5a3679bc37f 100644 --- a/opal/datatype/opal_datatype_unpack.h +++ b/opal/datatype/opal_datatype_unpack.h @@ -35,82 +35,90 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR, size_t* SPACE ) { const ddt_elem_desc_t* _elem = &((ELEM)->elem); - size_t total_count = _elem->count * _elem->blocklen; size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size; size_t do_now, do_now_bytes; + size_t blocklen_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size; unsigned char* _memory = (*memory) + _elem->disp; + unsigned char* _packed = *packed; - assert( *(COUNT) <= _elem->count * _elem->blocklen); + assert( *(COUNT) <= (_elem->count * _elem->blocklen)); if( cando_count > *(COUNT) ) cando_count = *(COUNT); - /** - * First check if we already did something on this element ? - */ - do_now = (total_count - *(COUNT)); /* done elements */ - if( 0 != do_now ) { - do_now = do_now % _elem->blocklen; /* partial blocklen? */ - - if( 0 != do_now ) { - size_t left_in_block = _elem->blocklen - do_now; /* left in the current blocklen */ - do_now = (left_in_block > cando_count ) ? cando_count : left_in_block; - do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; - - OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "unpack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n", - (void*)_memory, (void*)*(packed), (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); - MEMCPY_CSUM( _memory, *(packed), do_now_bytes, (CONVERTOR) ); - _memory = (*memory) + _elem->disp + (ptrdiff_t)do_now_bytes; - /* compensate if we just completed a blocklen */ - if( do_now == left_in_block ) - _memory += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size); - *(packed) += do_now_bytes; - *(SPACE) -= do_now_bytes; - *(COUNT) -= do_now; - cando_count -= do_now; + if( 1 == _elem->blocklen ) { /* Do as many full blocklen as possible */ + *(COUNT) -= cando_count; + for(; cando_count > 0; cando_count--) { + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %lu ) => space %lu\n", + (void*)_memory, (void*)_packed, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _memory, _packed, blocklen_bytes, (CONVERTOR) ); + _packed += blocklen_bytes; + _memory += _elem->extent; } + goto update_and_return; } + blocklen_bytes *= _elem->blocklen; /** - * Compute how many full blocklen we need to do and do them. + * First check if we already did something on this element ? The COUNT is the number + * of remaining predefined types in the current elem, not how many predefined types + * should be manipulated in the current call (this number is instead reflected on the + * SPACE). */ - do_now = cando_count / _elem->blocklen; + do_now = *(COUNT) % _elem->blocklen; /* any partial elements ? */ + /* premptively update the number of COUNT we will return. */ + *(COUNT) -= cando_count; if( 0 != do_now ) { - do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size; - for(size_t _i = 0; _i < do_now; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)_memory, (void*)*(packed), (unsigned long)do_now_bytes, (unsigned long)*(SPACE) ); ); - MEMCPY_CSUM( _memory, *(packed), do_now_bytes, (CONVERTOR) ); - *(packed) += do_now_bytes; - _memory += _elem->extent; - *(SPACE) -= do_now_bytes; - *(COUNT) -= _elem->blocklen; - cando_count -= _elem->blocklen; - } + size_t left_in_block = do_now; /* left in the current blocklen */ + do_now = (do_now > cando_count ) ? cando_count : do_now; + do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; + + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "unpack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n", + (void*)_memory, (void*)_packed, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); + MEMCPY_CSUM( _memory, _packed, do_now_bytes, (CONVERTOR) ); + _memory += (ptrdiff_t)do_now_bytes; + /* compensate if we just completed a blocklen */ + if( do_now == left_in_block ) + _memory += _elem->extent - blocklen_bytes; + _packed += do_now_bytes; + cando_count -= do_now; + } + + /* Do as many full blocklen as possible */ + for(size_t _i = 0; _elem->blocklen <= cando_count; _i++ ) { + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %lu ) => space %lu\n", + (void*)_memory, (void*)_packed, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _memory, _packed, blocklen_bytes, (CONVERTOR) ); + _packed += blocklen_bytes; + _memory += _elem->extent; + cando_count -= _elem->blocklen; } /** * As an epilog do anything left from the last blocklen. */ - do_now = cando_count; - if( 0 != do_now ) { - do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; + if( 0 != cando_count ) { + assert( cando_count < _elem->blocklen ); + do_now_bytes = cando_count * opal_datatype_basicDatatypes[_elem->common.type]->size; OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n", - (void*)_memory, (void*)*(packed), (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); - MEMCPY_CSUM( _memory, *(packed), do_now_bytes, (CONVERTOR) ); + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n", + (void*)_memory, (void*)_packed, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _memory, _packed, do_now_bytes, (CONVERTOR) ); _memory += do_now_bytes; - *(packed) += do_now_bytes; - *(SPACE) -= do_now_bytes; - *(COUNT) -= do_now; + _packed += do_now_bytes; } + update_and_return: *(memory) = _memory - _elem->disp; + *(SPACE) -= (_packed - *packed); + *(packed) = _packed; } static inline void unpack_contiguous_loop( opal_convertor_t* CONVERTOR, From 78cc0ff89193b0ec7034b4ea26f93aefb83e7d15 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 28 May 2019 14:54:40 -0400 Subject: [PATCH 53/94] Disable checksum. Signed-off-by: George Bosilca --- opal/datatype/opal_convertor.c | 16 +++++++++------- opal/datatype/opal_convertor.h | 6 ++++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 7a449302bff..4754723f68a 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -579,8 +579,9 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor, assert(! (convertor->flags & CONVERTOR_SEND)); OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ); - if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) { - if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) { +#if defined(CHECKSUM) + if( OPAL_UNLIKELY(convertor->flags & CONVERTOR_WITH_CHECKSUM) ) { + if( OPAL_UNLIKELY(!(convertor->flags & CONVERTOR_HOMOGENEOUS)) ) { convertor->fAdvance = opal_unpack_general_checksum; } else { if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { @@ -589,8 +590,9 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor, convertor->fAdvance = opal_generic_simple_unpack_checksum; } } - } else { - if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) { + } else +#endif /* defined(CHECKSUM) */ + if( OPAL_UNLIKELY(!(convertor->flags & CONVERTOR_HOMOGENEOUS)) ) { convertor->fAdvance = opal_unpack_general; } else { if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { @@ -599,7 +601,6 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor, convertor->fAdvance = opal_generic_simple_unpack; } } - } return OPAL_SUCCESS; } @@ -618,6 +619,7 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor, OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ); +#if defined(CHECKSUM) if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) { if( CONVERTOR_SEND_CONVERSION == (convertor->flags & (CONVERTOR_SEND_CONVERSION|CONVERTOR_HOMOGENEOUS)) ) { convertor->fAdvance = opal_pack_general_checksum; @@ -632,7 +634,8 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor, convertor->fAdvance = opal_generic_simple_pack_checksum; } } - } else { + } else +#endif /* defined(CHECKSUM) */ if( CONVERTOR_SEND_CONVERSION == (convertor->flags & (CONVERTOR_SEND_CONVERSION|CONVERTOR_HOMOGENEOUS)) ) { convertor->fAdvance = opal_pack_general; } else { @@ -646,7 +649,6 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor, convertor->fAdvance = opal_generic_simple_pack; } } - } return OPAL_SUCCESS; } diff --git a/opal/datatype/opal_convertor.h b/opal/datatype/opal_convertor.h index 875c111b1f1..b24d94c37b0 100644 --- a/opal/datatype/opal_convertor.h +++ b/opal/datatype/opal_convertor.h @@ -332,8 +332,10 @@ opal_convertor_set_position( opal_convertor_t* convertor, /* Remove the completed flag if it's already set */ convertor->flags &= ~CONVERTOR_COMPLETED; - if( !(convertor->flags & CONVERTOR_WITH_CHECKSUM) && - (convertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS) && + if( (convertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS) && +#if defined(CHECKSUM) + !(convertor->flags & CONVERTOR_WITH_CHECKSUM) && +#endif /* defined(CHECKSUM) */ (convertor->flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) ) { /* Contiguous and no checkpoint and no homogeneous unpack */ convertor->bConverted = *position; From d5cdfe70eff1371f69edf847bc1b164bd7e05d92 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Fri, 21 Jun 2019 13:15:12 -0400 Subject: [PATCH 54/94] Optimize the position placement. Upon detecting a datatype loop representation skip the entire loop according the the remaining space. Signed-off-by: George Bosilca --- opal/datatype/opal_datatype_position.c | 67 +++++++++----------------- 1 file changed, 23 insertions(+), 44 deletions(-) diff --git a/opal/datatype/opal_datatype_position.c b/opal/datatype/opal_datatype_position.c index f8137c7e0cb..204d670a3ef 100644 --- a/opal/datatype/opal_datatype_position.c +++ b/opal/datatype/opal_datatype_position.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2019 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -123,11 +123,18 @@ position_predefined_data( opal_convertor_t* CONVERTOR, do_now = cando_count / _elem->blocklen; if( 0 != do_now ) { do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size; +#if OPAL_ENABLE_DEBUG for(size_t _i = 0; _i < do_now; _i++ ) { position_single_block( CONVERTOR, &_memory, _elem->extent, SPACE, do_now_bytes, COUNT, _elem->blocklen ); cando_count -= _elem->blocklen; } +#else + _memory += do_now * _elem->extent; + *SPACE -= do_now * do_now_bytes; + *COUNT -= do_now * _elem->blocklen; + cando_count -= do_now * _elem->blocklen; +#endif /* OPAL_ENABLE_DEBUG */ } /** @@ -144,48 +151,16 @@ position_predefined_data( opal_convertor_t* CONVERTOR, *(POINTER) = _memory - _elem->disp; } -/** - * Advance the current position in the convertor based using the - * current contiguous loop and a left-over counter. Update the head - * pointer and the leftover byte space. - */ -static inline void -position_contiguous_loop( opal_convertor_t* CONVERTOR, - dt_elem_desc_t* ELEM, - size_t* COUNT, - unsigned char** POINTER, - size_t* SPACE ) -{ - ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM); - ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + (ELEM)->loop.items); - size_t _copy_loops = *(COUNT); - - if( (_copy_loops * _end_loop->size) > *(SPACE) ) - _copy_loops = *(SPACE) / _end_loop->size; - OPAL_DATATYPE_SAFEGUARD_POINTER( *(POINTER) + _end_loop->first_elem_disp, - (_copy_loops - 1) * _loop->extent + _end_loop->size, - (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); - *(POINTER) += _copy_loops * _loop->extent; - *(SPACE) -= _copy_loops * _end_loop->size; - *(COUNT) -= _copy_loops; -} - -#define POSITION_PREDEFINED_DATATYPE( CONVERTOR, ELEM, COUNT, POSITION, SPACE ) \ - position_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(POSITION), &(SPACE) ) - -#define POSITION_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, POSITION, SPACE ) \ - position_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(POSITION), &(SPACE) ) - int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, size_t* position ) { dt_stack_t* pStack; /* pointer to the position on the stack */ uint32_t pos_desc; /* actual position in the description of the derived datatype */ size_t count_desc; /* the number of items already done in the actual pos_desc */ + size_t iov_len_local; dt_elem_desc_t* description = pConvertor->use_desc->desc; dt_elem_desc_t* pElem; /* current position */ unsigned char *base_pointer = pConvertor->pBaseBuf; - size_t iov_len_local; ptrdiff_t extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb; DUMP( "opal_convertor_generic_simple_position( %p, &%ld )\n", (void*)pConvertor, (long)*position ); @@ -236,21 +211,19 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, assert(pConvertor->partial_length < element_length); return 0; } - pConvertor->partial_length = (pConvertor->partial_length + missing_length) % element_length; - assert(pConvertor->partial_length == 0); + pConvertor->partial_length = 0; pConvertor->bConverted += missing_length; iov_len_local -= missing_length; count_desc--; } while( 1 ) { - if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ + if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the the entire datatype */ DO_DEBUG( opal_output( 0, "position end_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %lx space %lu\n", pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, (unsigned long)iov_len_local ); ); if( --(pStack->count) == 0 ) { /* end of loop */ if( pConvertor->stack_pos == 0 ) { pConvertor->flags |= CONVERTOR_COMPLETED; - pConvertor->partial_length = 0; goto complete_loop; /* completed */ } pConvertor->stack_pos--; @@ -259,11 +232,13 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, } else { if( pStack->index == -1 ) { pStack->disp += extent; + pos_desc = 0; /* back to the first element */ } else { assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type ); pStack->disp += description[pStack->index].loop.extent; + pos_desc = pStack->index; /* go back to the loop start itself to give a chance + * to move forward by entire loops */ } - pos_desc = pStack->index + 1; } base_pointer = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); @@ -273,9 +248,14 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, } if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) { ptrdiff_t local_disp = (ptrdiff_t)base_pointer; - if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { - POSITION_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc, - base_pointer, iov_len_local ); + ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)(pElem + pElem->loop.items); + size_t full_loops = iov_len_local / end_loop->size; + full_loops = count_desc <= full_loops ? count_desc : full_loops; + if( full_loops ) { + base_pointer += full_loops * pElem->loop.extent; + iov_len_local -= full_loops * end_loop->size; + count_desc -= full_loops; + if( 0 == count_desc ) { /* completed */ pos_desc += pElem->loop.items + 1; goto update_loop_description; @@ -297,8 +277,7 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, } while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* now here we have a basic datatype */ - POSITION_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc, - base_pointer, iov_len_local ); + position_predefined_data( pConvertor, pElem, &count_desc, &base_pointer, &iov_len_local ); if( 0 != count_desc ) { /* completed */ pConvertor->partial_length = iov_len_local; goto complete_loop; From fad707d3b05f7e4baef3c55c1dbfaf4537b348c9 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 26 Jun 2019 12:55:44 -0400 Subject: [PATCH 55/94] Rework the datatype commit. Optimize contiguous loops by collapsing them into a single element. During datatype optimization collapse similar elements into larger blocks. Signed-off-by: George Bosilca --- opal/datatype/opal_datatype_internal.h | 12 ++++-- opal/datatype/opal_datatype_optimize.c | 60 ++++++++++++++++++-------- 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/opal/datatype/opal_datatype_internal.h b/opal/datatype/opal_datatype_internal.h index 2b2ddc0961e..1f10c9138aa 100644 --- a/opal/datatype/opal_datatype_internal.h +++ b/opal/datatype/opal_datatype_internal.h @@ -215,10 +215,8 @@ union dt_elem_desc { /** - * Create one or more elements depending on the value of _count. If the value - * is too large for the type of elem.count then use oth the elem.count and - * elem.blocklen to create it. If the number is prime then create a second - * element to account for the difference. + * Create an element entry in the description. If the element is contiguous + * collapse everything into the blocklen. */ #define CREATE_ELEM(_place, _type, _flags, _blocklen, _count, _disp, _extent) \ do { \ @@ -228,6 +226,12 @@ union dt_elem_desc { (_place)->elem.count = (_count); \ (_place)->elem.extent = (_extent); \ (_place)->elem.disp = (_disp); \ + if( _extent == (ptrdiff_t)(_blocklen * opal_datatype_basicDatatypes[_type]->size) ) { \ + /* collapse it into a single large blocklen */ \ + (_place)->elem.blocklen *= _count; \ + (_place)->elem.extent *= _count; \ + (_place)->elem.count = 1; \ + } \ } while(0) /* * This array holds the descriptions desc.desc[2] of the predefined basic datatypes. diff --git a/opal/datatype/opal_datatype_optimize.c b/opal/datatype/opal_datatype_optimize.c index fbaacb592c2..336e11f0560 100644 --- a/opal/datatype/opal_datatype_optimize.c +++ b/opal/datatype/opal_datatype_optimize.c @@ -60,27 +60,27 @@ opal_datatype_optimize_short( opal_datatype_t* pData, CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC, last.blocklen, last.count, last.disp, last.extent ); pElemDesc++; nbElems++; - last.disp += last.count; last.count= 0; } CREATE_LOOP_END( pElemDesc, nbElems - pStack->index + 1, /* # of elems in this loop */ end_loop->first_elem_disp, end_loop->size, end_loop->common.flags ); - pElemDesc++; nbElems++; if( --stack_pos >= 0 ) { /* still something to do ? */ ddt_loop_desc_t* pStartLoop = &(pTypeDesc->desc[pStack->index - 1].loop); - pStartLoop->items = end_loop->items; + pStartLoop->items = pElemDesc->end_loop.items; total_disp = pStack->disp; /* update the displacement position */ } + pElemDesc++; nbElems++; pStack--; /* go down one position on the stack */ pos_desc++; continue; } if( OPAL_DATATYPE_LOOP == pData->desc.desc[pos_desc].elem.common.type ) { ddt_loop_desc_t* loop = (ddt_loop_desc_t*)&(pData->desc.desc[pos_desc]); - ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]); int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) ); if( loop->common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { + ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]); + assert(pData->desc.desc[pos_desc + index].elem.disp == end_loop->first_elem_disp); compress.common.flags = loop->common.flags; compress.common.type = pData->desc.desc[pos_desc + index].elem.common.type; @@ -99,7 +99,12 @@ opal_datatype_optimize_short( opal_datatype_t* pData, compress.count = loop->loops; compress.extent = loop->extent; compress.disp = end_loop->first_elem_disp; - + if( compress.extent == (ptrdiff_t)(compress.blocklen * opal_datatype_basicDatatypes[compress.common.type]->size) ) { + /* The compressed element is contiguous: collapse it into a single large blocklen */ + compress.blocklen *= compress.count; + compress.extent *= compress.count; + compress.count = 1; + } /** * The current loop has been compressed and can now be treated as if it * was a data element. We can now look if it can be fused with last, @@ -161,26 +166,43 @@ opal_datatype_optimize_short( opal_datatype_t* pData, } /* are the two elements compatible: aka they have very similar values and they - * can be merged together by increasing the count. This optimizes the memory - * required for storing the datatype description. + * can be merged together by increasing the count, and/or changing the extent. */ - if( ((last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) == - (current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size)) && - (current->disp == (last.disp + (ptrdiff_t)last.count * last.extent)) && - ((current->count == 1) || (last.extent == current->extent)) ) { - last.count += current->count; - /* find the lowest common denomitaor type */ + if( (last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) == + (current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size) ) { + ddt_elem_desc_t save = last; /* safekeep the type and blocklen */ if( last.common.type != current->common.type ) { last.blocklen *= opal_datatype_basicDatatypes[last.common.type]->size; last.common.type = OPAL_DATATYPE_UINT1; } - /* maximize the contiguous pieces */ - if( last.extent == (ptrdiff_t)(last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) ) { - last.blocklen *= last.count; - last.count = 1; - last.extent = last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size; + + if( 1 == last.count ) { + /* we can ignore the extent of the element with count == 1 and merge them together if their displacements match */ + if( 1 == current->count ) { + last.extent = current->disp - last.disp; + last.count++; + continue; + } + /* can we compute a matching displacement ? */ + if( (last.disp + current->extent) == current->disp ) { + last.extent = current->extent; + last.count = current->count + 1; + continue; + } } - continue; /* next data */ + if( (last.extent * (ptrdiff_t)last.count + last.disp) == current->disp ) { + if( 1 == current->count ) { + last.count++; + continue; + } + if( last.extent == current->extent ) { + last.count += current->count; + continue; + } + } + last.blocklen = save.blocklen; + last.common.type = save.common.type; + /* try other optimizations */ } /* are the elements fusionable such that we can fusion the last blocklen of one with the first * blocklen of the other. From 87299e0b1c3a14b3ca70799fc0be12ef98ed7bcd Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 10 Jul 2019 00:28:29 -0400 Subject: [PATCH 56/94] Get rid of the division in the critical path. Amazing how a bad instruction scheduling can have such a drastic impact on the code performance. With this change, the get a boost of at least 50% on the performance of data with a small blocklen and/or count. Signed-off-by: George Bosilca --- opal/datatype/opal_datatype_pack.h | 28 +++++++++++++++++++--------- opal/datatype/opal_datatype_unpack.h | 27 +++++++++++++++++++-------- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/opal/datatype/opal_datatype_pack.h b/opal/datatype/opal_datatype_pack.h index 514f8bd7b02..4da9bd2450e 100644 --- a/opal/datatype/opal_datatype_pack.h +++ b/opal/datatype/opal_datatype_pack.h @@ -35,19 +35,24 @@ pack_predefined_data( opal_convertor_t* CONVERTOR, size_t* SPACE ) { const ddt_elem_desc_t* _elem = &((ELEM)->elem); - size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size; - size_t do_now, do_now_bytes; size_t blocklen_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size; + size_t cando_count = *(COUNT), do_now, do_now_bytes; unsigned char* _memory = (*memory) + _elem->disp; unsigned char* _packed = *packed; assert( *(COUNT) <= _elem->count * _elem->blocklen); - if( cando_count > *(COUNT) ) - cando_count = *(COUNT); + if( (blocklen_bytes * cando_count) > *(SPACE) ) + cando_count = (*SPACE) / blocklen_bytes; + do_now = *(COUNT); /* save the COUNT for later */ + /* premptively update the number of COUNT we will return. */ + *(COUNT) -= cando_count; + + if( 1 == _elem->count ) { /* Everything is contiguous, handle it as a prologue */ + goto do_epilog; + } if( 1 == _elem->blocklen ) { /* Do as many full blocklen as possible */ - *(COUNT) -= cando_count; for(; cando_count > 0; cando_count--) { OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); @@ -59,17 +64,19 @@ pack_predefined_data( opal_convertor_t* CONVERTOR, } goto update_and_return; } - blocklen_bytes *= _elem->blocklen; + blocklen_bytes *= _elem->blocklen; + if( (_elem->count * _elem->blocklen) == cando_count ) { + goto skip_prolog; + } /** * First check if we already did something on this element ? The COUNT is the number * of remaining predefined types in the current elem, not how many predefined types * should be manipulated in the current call (this number is instead reflected on the * SPACE). */ - do_now = *(COUNT) % _elem->blocklen; /* any partial elements ? */ - /* premptively update the number of COUNT we will return. */ - *(COUNT) -= cando_count; + do_now = do_now % _elem->blocklen; /* any partial elements ? */ + if( 0 != do_now ) { size_t left_in_block = do_now; /* left in the current blocklen */ do_now = (do_now > cando_count ) ? cando_count : do_now; @@ -88,6 +95,7 @@ pack_predefined_data( opal_convertor_t* CONVERTOR, cando_count -= do_now; } + skip_prolog: /* Do as many full blocklen as possible */ for(size_t _i = 0; _elem->blocklen <= cando_count; _i++ ) { OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, @@ -104,6 +112,8 @@ pack_predefined_data( opal_convertor_t* CONVERTOR, * As an epilog do anything left from the last blocklen. */ if( 0 != cando_count ) { + + do_epilog: assert( cando_count < _elem->blocklen ); do_now_bytes = cando_count * opal_datatype_basicDatatypes[_elem->common.type]->size; OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, diff --git a/opal/datatype/opal_datatype_unpack.h b/opal/datatype/opal_datatype_unpack.h index 5a3679bc37f..49a418ba2b3 100644 --- a/opal/datatype/opal_datatype_unpack.h +++ b/opal/datatype/opal_datatype_unpack.h @@ -35,19 +35,24 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR, size_t* SPACE ) { const ddt_elem_desc_t* _elem = &((ELEM)->elem); - size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size; - size_t do_now, do_now_bytes; size_t blocklen_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size; + size_t cando_count = (*COUNT), do_now, do_now_bytes; unsigned char* _memory = (*memory) + _elem->disp; unsigned char* _packed = *packed; assert( *(COUNT) <= (_elem->count * _elem->blocklen)); - if( cando_count > *(COUNT) ) - cando_count = *(COUNT); + if( (blocklen_bytes * cando_count) > *(SPACE) ) + cando_count = (*SPACE) / blocklen_bytes; + do_now = *(COUNT); /* save the COUNT for later */ + /* premptively update the number of COUNT we will return. */ + *(COUNT) -= cando_count; + + if( 1 == _elem->count ) { /* Everything is contiguous, handle it as a prologue */ + goto do_epilog; + } if( 1 == _elem->blocklen ) { /* Do as many full blocklen as possible */ - *(COUNT) -= cando_count; for(; cando_count > 0; cando_count--) { OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); @@ -59,7 +64,11 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR, } goto update_and_return; } + blocklen_bytes *= _elem->blocklen; + if( (_elem->count * _elem->blocklen) == cando_count ) { + goto skip_prolog; + } /** * First check if we already did something on this element ? The COUNT is the number @@ -67,9 +76,8 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR, * should be manipulated in the current call (this number is instead reflected on the * SPACE). */ - do_now = *(COUNT) % _elem->blocklen; /* any partial elements ? */ - /* premptively update the number of COUNT we will return. */ - *(COUNT) -= cando_count; + do_now = do_now % _elem->blocklen; /* any partial elements ? */ + if( 0 != do_now ) { size_t left_in_block = do_now; /* left in the current blocklen */ do_now = (do_now > cando_count ) ? cando_count : do_now; @@ -88,6 +96,7 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR, cando_count -= do_now; } + skip_prolog: /* Do as many full blocklen as possible */ for(size_t _i = 0; _elem->blocklen <= cando_count; _i++ ) { OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, @@ -104,6 +113,8 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR, * As an epilog do anything left from the last blocklen. */ if( 0 != cando_count ) { + + do_epilog: assert( cando_count < _elem->blocklen ); do_now_bytes = cando_count * opal_datatype_basicDatatypes[_elem->common.type]->size; OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, From f78d3d52cd32846fab0cceeb624a1f51caaa9fca Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 10 Jul 2019 11:30:59 -0400 Subject: [PATCH 57/94] Optimize the pack/unpack. Start optimizing the code. This commit divides the operations in 2 parts, the first, outside the critical part, deals with partial blocks of predefined elements, and the second, inside the critical path, only deals with full blocks of elements. This reduces the number of expensive operations in the critical path and results in a decent performance increase. Signed-off-by: George Bosilca --- opal/datatype/opal_datatype_pack.c | 30 ++++-- opal/datatype/opal_datatype_pack.h | 135 ++++++++++++++++---------- opal/datatype/opal_datatype_unpack.c | 82 +++++++++------- opal/datatype/opal_datatype_unpack.h | 140 +++++++++++++++++---------- 4 files changed, 238 insertions(+), 149 deletions(-) diff --git a/opal/datatype/opal_datatype_pack.c b/opal/datatype/opal_datatype_pack.c index cf69f6ada22..c0ab6df66d8 100644 --- a/opal/datatype/opal_datatype_pack.c +++ b/opal/datatype/opal_datatype_pack.c @@ -272,18 +272,32 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor, for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { iov_ptr = (unsigned char *) iov[iov_count].iov_base; iov_len_local = iov[iov_count].iov_len; - while( 1 ) { - while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { - /* now here we have a basic datatype */ - PACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc, - conv_ptr, iov_ptr, iov_len_local ); - if( 0 == count_desc ) { /* completed */ + + if( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { + if( (pElem->elem.count * pElem->elem.blocklen) != count_desc ) { + /* we have a partial (less than blocklen) basic datatype */ + int rc = PACK_PARTIAL_BLOCKLEN( pConvertor, pElem, count_desc, + conv_ptr, iov_ptr, iov_len_local ); + if( 0 == rc ) /* not done */ + goto complete_loop; + if( 0 == count_desc ) { conv_ptr = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); - continue; } - goto complete_loop; + } + } + + while( 1 ) { + while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { + /* we have a basic datatype (working on full blocks) */ + PACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc, + conv_ptr, iov_ptr, iov_len_local ); + if( 0 != count_desc ) /* completed? */ + goto complete_loop; + conv_ptr = pConvertor->pBaseBuf + pStack->disp; + pos_desc++; /* advance to the next data */ + UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); } if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ DO_DEBUG( opal_output( 0, "pack end_loop count %" PRIsize_t " stack_pos %d" diff --git a/opal/datatype/opal_datatype_pack.h b/opal/datatype/opal_datatype_pack.h index 4da9bd2450e..1eaf2e8b9f9 100644 --- a/opal/datatype/opal_datatype_pack.h +++ b/opal/datatype/opal_datatype_pack.h @@ -26,6 +26,63 @@ CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) ) #endif +/** + * This function deals only with partial elements. The COUNT points however to the whole leftover count, + * but this function is only expected to operate on an amount less than blength, that would allow the rest + * of the pack process to handle only entire blength blocks (plus the left over). + * + * Return 1 if we are now aligned on a block, 0 otherwise. + */ +static inline int +pack_partial_blocklen( opal_convertor_t* CONVERTOR, + const dt_elem_desc_t* ELEM, + size_t* COUNT, + unsigned char** memory, + unsigned char** packed, + size_t* SPACE ) +{ + const ddt_elem_desc_t* _elem = &((ELEM)->elem); + size_t do_now_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size; + size_t do_now = *(COUNT); + unsigned char* _memory = (*memory) + _elem->disp; + unsigned char* _packed = *packed; + + assert( *(COUNT) <= _elem->count * _elem->blocklen); + + /** + * First check if we already did something on this element ? The COUNT is the number + * of remaining predefined types in the current elem, not how many predefined types + * should be manipulated in the current call (this number is instead reflected on the + * SPACE). + */ + if( 0 == (do_now = (*COUNT) % _elem->blocklen) ) + return 1; + + size_t left_in_block = do_now; /* left in the current blocklen */ + + if( (do_now_bytes * do_now) > *(SPACE) ) + do_now = (*SPACE) / do_now_bytes; + + do_now_bytes *= do_now; + + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "pack memcpy( %p, %p, %lu ) => space %lu [partial]\n", + _packed, (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); + MEMCPY_CSUM( _packed, _memory, do_now_bytes, (CONVERTOR) ); + *(memory) += (ptrdiff_t)do_now_bytes; + if( do_now == left_in_block ) /* compensate if completed a blocklen */ + *(memory) += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size); + + *(COUNT) -= do_now; + *(SPACE) -= do_now_bytes; + *(packed) += do_now_bytes; + return (do_now == left_in_block); +} + +/** + * Pack entire blocks, plus a possible remainder if SPACE is constrained to less than COUNT elements. + */ static inline void pack_predefined_data( opal_convertor_t* CONVERTOR, const dt_elem_desc_t* ELEM, @@ -36,27 +93,24 @@ pack_predefined_data( opal_convertor_t* CONVERTOR, { const ddt_elem_desc_t* _elem = &((ELEM)->elem); size_t blocklen_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size; - size_t cando_count = *(COUNT), do_now, do_now_bytes; + size_t cando_count = *(COUNT), do_now_bytes; unsigned char* _memory = (*memory) + _elem->disp; unsigned char* _packed = *packed; + assert( 0 == (cando_count % _elem->blocklen) ); /* no partials here */ assert( *(COUNT) <= _elem->count * _elem->blocklen); if( (blocklen_bytes * cando_count) > *(SPACE) ) cando_count = (*SPACE) / blocklen_bytes; - do_now = *(COUNT); /* save the COUNT for later */ /* premptively update the number of COUNT we will return. */ *(COUNT) -= cando_count; - if( 1 == _elem->count ) { /* Everything is contiguous, handle it as a prologue */ - goto do_epilog; - } if( 1 == _elem->blocklen ) { /* Do as many full blocklen as possible */ for(; cando_count > 0; cando_count--) { OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n", + DO_DEBUG( opal_output( 0, "pack memcpy( %p, %p, %lu ) => space %lu [blen = 1]\n", (void*)_packed, (void*)_memory, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); MEMCPY_CSUM( _packed, _memory, blocklen_bytes, (CONVERTOR) ); _packed += blocklen_bytes; @@ -65,61 +119,32 @@ pack_predefined_data( opal_convertor_t* CONVERTOR, goto update_and_return; } - blocklen_bytes *= _elem->blocklen; - if( (_elem->count * _elem->blocklen) == cando_count ) { - goto skip_prolog; - } - /** - * First check if we already did something on this element ? The COUNT is the number - * of remaining predefined types in the current elem, not how many predefined types - * should be manipulated in the current call (this number is instead reflected on the - * SPACE). - */ - do_now = do_now % _elem->blocklen; /* any partial elements ? */ + if( (1 < _elem->count) && (_elem->blocklen <= cando_count) ) { + blocklen_bytes *= _elem->blocklen; - if( 0 != do_now ) { - size_t left_in_block = do_now; /* left in the current blocklen */ - do_now = (do_now > cando_count ) ? cando_count : do_now; - do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; - - OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "pack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n", - _packed, (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); - MEMCPY_CSUM( _packed, _memory, do_now_bytes, (CONVERTOR) ); - _memory += (ptrdiff_t)do_now_bytes; - /* compensate if we just completed a blocklen */ - if( do_now == left_in_block ) - _memory += _elem->extent - blocklen_bytes; - _packed += do_now_bytes; - cando_count -= do_now; - } - - skip_prolog: - /* Do as many full blocklen as possible */ - for(size_t _i = 0; _elem->blocklen <= cando_count; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)_packed, (void*)_memory, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); - MEMCPY_CSUM( _packed, _memory, blocklen_bytes, (CONVERTOR) ); - _packed += blocklen_bytes; - _memory += _elem->extent; - cando_count -= _elem->blocklen; + do { /* Do as many full blocklen as possible */ + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n", + (void*)_packed, (void*)_memory, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _packed, _memory, blocklen_bytes, (CONVERTOR) ); + _packed += blocklen_bytes; + _memory += _elem->extent; + cando_count -= _elem->blocklen; + } while (_elem->blocklen <= cando_count); } /** * As an epilog do anything left from the last blocklen. */ if( 0 != cando_count ) { - - do_epilog: - assert( cando_count < _elem->blocklen ); + assert( (cando_count < _elem->blocklen) || + ((1 == _elem->count) && (cando_count <= _elem->blocklen)) ); do_now_bytes = cando_count * opal_datatype_basicDatatypes[_elem->common.type]->size; OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n", - (void*)_packed, (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); + (void*)_packed, (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); MEMCPY_CSUM( _packed, _memory, do_now_bytes, (CONVERTOR) ); _memory += do_now_bytes; _packed += do_now_bytes; @@ -159,7 +184,15 @@ static inline void pack_contiguous_loop( opal_convertor_t* CONVERTOR, *(COUNT) -= _copy_loops; } -#define PACK_PREDEFINED_DATATYPE( CONVERTOR, /* the convertor */ \ +#define PACK_PARTIAL_BLOCKLEN( CONVERTOR, /* the convertor */ \ + ELEM, /* the basic element to be packed */ \ + COUNT, /* the number of elements */ \ + MEMORY, /* the source pointer (char*) */ \ + PACKED, /* the destination pointer (char*) */ \ + SPACE ) /* the space in the destination buffer */ \ +pack_partial_blocklen( (CONVERTOR), (ELEM), &(COUNT), &(MEMORY), &(PACKED), &(SPACE) ) + +#define PACK_PREDEFINED_DATATYPE( CONVERTOR, /* the convertor */ \ ELEM, /* the basic element to be packed */ \ COUNT, /* the number of elements */ \ MEMORY, /* the source pointer (char*) */ \ diff --git a/opal/datatype/opal_datatype_unpack.c b/opal/datatype/opal_datatype_unpack.c index ac35a03c267..dca07796d99 100644 --- a/opal/datatype/opal_datatype_unpack.c +++ b/opal/datatype/opal_datatype_unpack.c @@ -282,6 +282,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { iov_ptr = (unsigned char *) iov[iov_count].iov_base; iov_len_local = iov[iov_count].iov_len; + if( 0 != pConvertor->partial_length ) { size_t element_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size; size_t missing_length = element_length - pConvertor->partial_length; @@ -302,34 +303,31 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, iov_len_local -= missing_length; pConvertor->partial_length = 0; /* nothing more inside */ } - while( 1 ) { - while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { - /* now here we have a basic datatype */ - UNPACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc, - iov_ptr, conv_ptr, iov_len_local ); - if( 0 == count_desc ) { /* completed */ + if( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { + if( (pElem->elem.count * pElem->elem.blocklen) != count_desc ) { + /* we have a partial (less than blocklen) basic datatype */ + int rc = UNPACK_PARTIAL_BLOCKLEN( pConvertor, pElem, count_desc, + iov_ptr, conv_ptr, iov_len_local ); + if( 0 == rc ) /* not done */ + goto complete_loop; + if( 0 == count_desc ) { conv_ptr = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); - continue; - } - assert( pElem->elem.common.type < OPAL_DATATYPE_MAX_PREDEFINED ); - if( 0 != iov_len_local ) { - unsigned char* temp = conv_ptr; - /* We have some partial data here. Let's copy it into the convertor - * and keep it hot until the next round. - */ - assert( iov_len_local < opal_datatype_basicDatatypes[pElem->elem.common.type]->size ); - COMPUTE_CSUM( iov_ptr, iov_len_local, pConvertor ); - - opal_unpack_partial_datatype( pConvertor, pElem, - iov_ptr, 0, iov_len_local, - &temp ); - - pConvertor->partial_length = iov_len_local; - iov_len_local = 0; } - goto complete_loop; + } + } + + while( 1 ) { + while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { + /* we have a basic datatype (working on full blocks) */ + UNPACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc, + iov_ptr, conv_ptr, iov_len_local ); + if( 0 != count_desc ) /* completed? */ + goto complete_loop; + conv_ptr = pConvertor->pBaseBuf + pStack->disp; + pos_desc++; /* advance to the next data */ + UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); } if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ DO_DEBUG( opal_output( 0, "unpack end_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %ld space %lu\n", @@ -337,11 +335,9 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, pStack->disp, (unsigned long)iov_len_local ); ); if( --(pStack->count) == 0 ) { /* end of loop */ if( 0 == pConvertor->stack_pos ) { - /* Do the same thing as when the loop is completed */ - iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */ - total_unpacked += iov[iov_count].iov_len; - iov_count++; /* go to the next */ - goto complete_conversion; + /* we're done. Force the exit of the main for loop (around iovec) */ + *out_size = iov_count; + goto complete_loop; } pConvertor->stack_pos--; pStack--; @@ -380,14 +376,29 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, conv_ptr = pConvertor->pBaseBuf + pStack->disp; UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" ); - continue; } } complete_loop: + assert( pElem->elem.common.type < OPAL_DATATYPE_MAX_PREDEFINED ); + if( 0 != iov_len_local ) { + unsigned char* temp = conv_ptr; + /* We have some partial data here. Let's copy it into the convertor + * and keep it hot until the next round. + */ + assert( iov_len_local < opal_datatype_basicDatatypes[pElem->elem.common.type]->size ); + COMPUTE_CSUM( iov_ptr, iov_len_local, pConvertor ); + + opal_unpack_partial_datatype( pConvertor, pElem, + iov_ptr, 0, iov_len_local, + &temp ); + + pConvertor->partial_length = iov_len_local; + iov_len_local = 0; + } + iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */ total_unpacked += iov[iov_count].iov_len; } - complete_conversion: *max_data = total_unpacked; pConvertor->bConverted += total_unpacked; /* update the already converted bytes */ *out_size = iov_count; @@ -514,11 +525,9 @@ opal_unpack_general_function( opal_convertor_t* pConvertor, pStack->disp, (unsigned long)iov_len_local ); ); if( --(pStack->count) == 0 ) { /* end of loop */ if( 0 == pConvertor->stack_pos ) { - /* Do the same thing as when the loop is completed */ - iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */ - total_unpacked += iov[iov_count].iov_len; - iov_count++; /* go to the next */ - goto complete_conversion; + /* we're done. Force the exit of the main for loop (around iovec) */ + *out_size = iov_count; + goto complete_loop; } pConvertor->stack_pos--; pStack--; @@ -552,7 +561,6 @@ opal_unpack_general_function( opal_convertor_t* pConvertor, iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */ total_unpacked += iov[iov_count].iov_len; } - complete_conversion: *max_data = total_unpacked; pConvertor->bConverted += total_unpacked; /* update the already converted bytes */ *out_size = iov_count; diff --git a/opal/datatype/opal_datatype_unpack.h b/opal/datatype/opal_datatype_unpack.h index 49a418ba2b3..db5b58fd3c3 100644 --- a/opal/datatype/opal_datatype_unpack.h +++ b/opal/datatype/opal_datatype_unpack.h @@ -26,6 +26,60 @@ CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) ) #endif +/** + * This function deals only with partial elements. The COUNT points however to the whole leftover count, + * but this function is only expected to operate on an amount less than blength, that would allow the rest + * of the pack process to handle only entire blength blocks (plus the left over). + * + * Return 1 if we are now aligned on a block, 0 otherwise. + */ +static inline int +unpack_partial_blocklen( opal_convertor_t* CONVERTOR, + const dt_elem_desc_t* ELEM, + size_t* COUNT, + unsigned char** packed, + unsigned char** memory, + size_t* SPACE ) +{ + const ddt_elem_desc_t* _elem = &((ELEM)->elem); + size_t do_now_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size; + size_t do_now = (*COUNT); + unsigned char* _memory = (*memory) + _elem->disp; + unsigned char* _packed = *packed; + + assert( *(COUNT) <= (_elem->count * _elem->blocklen)); + + /** + * First check if we already did something on this element ? The COUNT is the number + * of remaining predefined types in the current elem, not how many predefined types + * should be manipulated in the current call (this number is instead reflected on the + * SPACE). + */ + if( 0 == (do_now = (*COUNT) % _elem->blocklen) ) + return 1; + + size_t left_in_block = do_now; /* left in the current blocklen */ + + if( (do_now_bytes * do_now) > *(SPACE) ) + do_now = (*SPACE) / do_now_bytes; + + do_now_bytes *= do_now; + + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "unpack memcpy( %p, %p, %lu ) => space %lu [prolog]\n", + (void*)_memory, (void*)_packed, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); + MEMCPY_CSUM( _memory, _packed, do_now_bytes, (CONVERTOR) ); + *(memory) += (ptrdiff_t)do_now_bytes; + if( do_now == left_in_block ) /* compensate if completed a blocklen */ + *(memory) += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size); + + *(COUNT) -= do_now; + *(SPACE) -= do_now_bytes; + *(packed) += do_now_bytes; + return (do_now == left_in_block); +} + static inline void unpack_predefined_data( opal_convertor_t* CONVERTOR, const dt_elem_desc_t* ELEM, @@ -36,27 +90,24 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR, { const ddt_elem_desc_t* _elem = &((ELEM)->elem); size_t blocklen_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size; - size_t cando_count = (*COUNT), do_now, do_now_bytes; + size_t cando_count = (*COUNT), do_now_bytes; unsigned char* _memory = (*memory) + _elem->disp; unsigned char* _packed = *packed; + assert( 0 == (cando_count % _elem->blocklen) ); /* no partials here */ assert( *(COUNT) <= (_elem->count * _elem->blocklen)); if( (blocklen_bytes * cando_count) > *(SPACE) ) cando_count = (*SPACE) / blocklen_bytes; - do_now = *(COUNT); /* save the COUNT for later */ /* premptively update the number of COUNT we will return. */ *(COUNT) -= cando_count; - - if( 1 == _elem->count ) { /* Everything is contiguous, handle it as a prologue */ - goto do_epilog; - } + if( 1 == _elem->blocklen ) { /* Do as many full blocklen as possible */ for(; cando_count > 0; cando_count--) { OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %lu ) => space %lu\n", + DO_DEBUG( opal_output( 0, "unpack memcpy( %p, %p, %lu ) => space %lu [blen = 1]\n", (void*)_memory, (void*)_packed, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); MEMCPY_CSUM( _memory, _packed, blocklen_bytes, (CONVERTOR) ); _packed += blocklen_bytes; @@ -65,57 +116,27 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR, goto update_and_return; } - blocklen_bytes *= _elem->blocklen; - if( (_elem->count * _elem->blocklen) == cando_count ) { - goto skip_prolog; - } - - /** - * First check if we already did something on this element ? The COUNT is the number - * of remaining predefined types in the current elem, not how many predefined types - * should be manipulated in the current call (this number is instead reflected on the - * SPACE). - */ - do_now = do_now % _elem->blocklen; /* any partial elements ? */ - - if( 0 != do_now ) { - size_t left_in_block = do_now; /* left in the current blocklen */ - do_now = (do_now > cando_count ) ? cando_count : do_now; - do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size; - - OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "unpack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n", - (void*)_memory, (void*)_packed, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); ); - MEMCPY_CSUM( _memory, _packed, do_now_bytes, (CONVERTOR) ); - _memory += (ptrdiff_t)do_now_bytes; - /* compensate if we just completed a blocklen */ - if( do_now == left_in_block ) - _memory += _elem->extent - blocklen_bytes; - _packed += do_now_bytes; - cando_count -= do_now; - } + if( (1 < _elem->count) && (_elem->blocklen <= cando_count) ) { + blocklen_bytes *= _elem->blocklen; - skip_prolog: - /* Do as many full blocklen as possible */ - for(size_t _i = 0; _elem->blocklen <= cando_count; _i++ ) { - OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, - (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %lu ) => space %lu\n", - (void*)_memory, (void*)_packed, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); - MEMCPY_CSUM( _memory, _packed, blocklen_bytes, (CONVERTOR) ); - _packed += blocklen_bytes; - _memory += _elem->extent; - cando_count -= _elem->blocklen; + do { /* Do as many full blocklen as possible */ + OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf, + (CONVERTOR)->pDesc, (CONVERTOR)->count ); + DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %lu ) => space %lu\n", + (void*)_memory, (void*)_packed, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); ); + MEMCPY_CSUM( _memory, _packed, blocklen_bytes, (CONVERTOR) ); + _packed += blocklen_bytes; + _memory += _elem->extent; + cando_count -= _elem->blocklen; + } while (_elem->blocklen <= cando_count); } /** * As an epilog do anything left from the last blocklen. */ if( 0 != cando_count ) { - - do_epilog: - assert( cando_count < _elem->blocklen ); + assert( (cando_count < _elem->blocklen) || + ((1 == _elem->count) && (cando_count <= _elem->blocklen)) ); do_now_bytes = cando_count * opal_datatype_basicDatatypes[_elem->common.type]->size; OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); @@ -160,8 +181,21 @@ static inline void unpack_contiguous_loop( opal_convertor_t* CONVERTOR, *(COUNT) -= _copy_loops; } -#define UNPACK_PREDEFINED_DATATYPE( CONVERTOR, ELEM, COUNT, PACKED, MEMORY, SPACE ) \ - unpack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(PACKED), &(MEMORY), &(SPACE) ) +#define UNPACK_PARTIAL_BLOCKLEN( CONVERTOR, /* the convertor */ \ + ELEM, /* the basic element to be packed */ \ + COUNT, /* the number of elements */ \ + PACKED, /* the destination pointer (char*) */ \ + MEMORY, /* the source pointer (char*) */ \ + SPACE ) /* the space in the destination buffer */ \ +unpack_partial_blocklen( (CONVERTOR), (ELEM), &(COUNT), &(PACKED), &(MEMORY), &(SPACE) ) + +#define UNPACK_PREDEFINED_DATATYPE( CONVERTOR, /* the convertor */ \ + ELEM, /* the basic element to be packed */ \ + COUNT, /* the number of elements */ \ + PACKED, /* the destination pointer (char*) */ \ + MEMORY, /* the source pointer (char*) */ \ + SPACE ) /* the space in the destination buffer */ \ +unpack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(PACKED), &(MEMORY), &(SPACE) ) #define UNPACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, PACKED, MEMORY, SPACE ) \ unpack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(PACKED), &(MEMORY), &(SPACE) ) From 9898332ae0bc7e61ce88353ec4d08faf7a98bcd3 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 6 Aug 2019 07:48:58 -0700 Subject: [PATCH 58/94] Allow individual jobs to set their map/rank/bind policies Override the defaults when provided. Ignore LSF binding file if user overrides by specifying a policy. Fixes #6631 Signed-off-by: Ralph Castain (cherry picked from commit ea0dfc321809db50f78e742da1d22f9ef59650a3) --- orte/mca/ras/lsf/ras_lsf_module.c | 16 ++++++++++++++-- orte/mca/rmaps/base/rmaps_base_frame.c | 1 + orte/orted/pmix/pmix_server_dyn.c | 21 --------------------- 3 files changed, 15 insertions(+), 23 deletions(-) diff --git a/orte/mca/ras/lsf/ras_lsf_module.c b/orte/mca/ras/lsf/ras_lsf_module.c index becec82f213..43b3c7e7571 100644 --- a/orte/mca/ras/lsf/ras_lsf_module.c +++ b/orte/mca/ras/lsf/ras_lsf_module.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2014 Intel, Inc. All rights reserved + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -70,6 +70,7 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) char *affinity_file; struct stat buf; char *ptr; + bool directives_given = false; /* get the list of allocated nodes */ if ((num_nodes = lsb_getalloc(&nodelist)) < 0) { @@ -112,8 +113,19 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) /* release the nodelist from lsf */ opal_argv_free(nodelist); + /* check to see if any mapping or binding directives were given */ + if (NULL != jdata && NULL != jdata->map) { + if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) || + OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { + directives_given = true; + } + } else if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) || + OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) { + directives_given = true; + } + /* check for an affinity file */ - if (NULL != (affinity_file = getenv("LSB_AFFINITY_HOSTFILE"))) { + if (!directives_given && NULL != (affinity_file = getenv("LSB_AFFINITY_HOSTFILE"))) { /* check to see if the file is empty - if it is, * then affinity wasn't actually set for this job */ if (0 != stat(affinity_file, &buf)) { diff --git a/orte/mca/rmaps/base/rmaps_base_frame.c b/orte/mca/rmaps/base/rmaps_base_frame.c index befb9fd5fe5..bca9a6d8693 100644 --- a/orte/mca/rmaps/base/rmaps_base_frame.c +++ b/orte/mca/rmaps/base/rmaps_base_frame.c @@ -556,6 +556,7 @@ static int check_modifiers(char *ck, orte_mapping_policy_t *tmp) for (i=0; NULL != ck2[i]; i++) { if (0 == strncasecmp(ck2[i], "span", strlen(ck2[i]))) { ORTE_SET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_SPAN); + ORTE_SET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_GIVEN); found = true; } else if (0 == strncasecmp(ck2[i], "pe", strlen("pe"))) { /* break this at the = sign to get the number */ diff --git a/orte/orted/pmix/pmix_server_dyn.c b/orte/orted/pmix/pmix_server_dyn.c index 4a8bf3ee8d2..c3ab28f7ae8 100644 --- a/orte/orted/pmix/pmix_server_dyn.c +++ b/orte/orted/pmix/pmix_server_dyn.c @@ -302,13 +302,6 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, /*** MAP-BY ***/ } else if (0 == strcmp(info->key, OPAL_PMIX_MAPBY)) { - if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { - /* not allowed to provide multiple mapping policies */ - orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", - true, "mapping", info->data.string, - orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); - return ORTE_ERR_BAD_PARAM; - } rc = orte_rmaps_base_set_mapping_policy(jdata, &jdata->map->mapping, NULL, info->data.string); if (ORTE_SUCCESS != rc) { @@ -317,13 +310,6 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, /*** RANK-BY ***/ } else if (0 == strcmp(info->key, OPAL_PMIX_RANKBY)) { - if (ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) { - /* not allowed to provide multiple ranking policies */ - orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", - true, "ranking", info->data.string, - orte_rmaps_base_print_ranking(orte_rmaps_base.ranking)); - return ORTE_ERR_BAD_PARAM; - } rc = orte_rmaps_base_set_ranking_policy(&jdata->map->ranking, jdata->map->mapping, info->data.string); @@ -333,13 +319,6 @@ int pmix_server_spawn_fn(opal_process_name_t *requestor, /*** BIND-TO ***/ } else if (0 == strcmp(info->key, OPAL_PMIX_BINDTO)) { - if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { - /* not allowed to provide multiple mapping policies */ - orte_show_help("help-opal-hwloc-base.txt", "redefining-policy", true, - info->data.string, - opal_hwloc_base_print_binding(opal_hwloc_binding_policy)); - return ORTE_ERR_BAD_PARAM; - } rc = opal_hwloc_base_set_binding_policy(&jdata->map->binding, info->data.string); if (ORTE_SUCCESS != rc) { From f0f25b60a8c4b0341f5e5dc8400585c37288ec24 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 7 Aug 2019 05:47:12 -0700 Subject: [PATCH 59/94] Fix typos Provide a missing header and paren Thanks to @zerothi for the assistance Signed-off-by: Ralph Castain (cherry picked from commit bd5a1765eea200651babc5bfd9f45a9f3cedefbc) --- orte/mca/ras/lsf/ras_lsf_module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/orte/mca/ras/lsf/ras_lsf_module.c b/orte/mca/ras/lsf/ras_lsf_module.c index 43b3c7e7571..6dd3b68be5f 100644 --- a/orte/mca/ras/lsf/ras_lsf_module.c +++ b/orte/mca/ras/lsf/ras_lsf_module.c @@ -36,6 +36,7 @@ #include "orte/mca/rmaps/rmaps_types.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmaps/base/base.h" #include "orte/runtime/orte_globals.h" #include "orte/util/show_help.h" @@ -120,7 +121,7 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) directives_given = true; } } else if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) || - OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) { + OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) { directives_given = true; } From 2fa112c0a6163c2689976d1f831efbd417fffeb5 Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Mon, 5 Aug 2019 18:05:01 +0300 Subject: [PATCH 60/94] UCX: added PPN hint for UCX context - added PPN hint for UCX context init Signed-off-by: Sergey Oblomov (cherry picked from commit 43186e494b47ca29e8d5e7a864b6b98b8e873195) Conflicts: opal/mca/common/ucx/common_ucx_wpool.c --- config/ompi_check_ucx.m4 | 3 ++- ompi/mca/pml/ucx/pml_ucx.c | 29 ++++++++++++++---------- oshmem/mca/spml/ucx/spml_ucx_component.c | 13 +++++++++-- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/config/ompi_check_ucx.m4 b/config/ompi_check_ucx.m4 index 42e53f9ce80..7f04ba3a52c 100644 --- a/config/ompi_check_ucx.m4 +++ b/config/ompi_check_ucx.m4 @@ -120,7 +120,8 @@ AC_DEFUN([OMPI_CHECK_UCX],[ UCP_ATOMIC_POST_OP_XOR, UCP_ATOMIC_FETCH_OP_FAND, UCP_ATOMIC_FETCH_OP_FOR, - UCP_ATOMIC_FETCH_OP_FXOR], + UCP_ATOMIC_FETCH_OP_FXOR, + UCP_PARAM_FIELD_ESTIMATED_NUM_PPN], [], [], [#include ]) AC_CHECK_DECLS([UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS], diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c index ffb7d618343..fb7b7f84615 100644 --- a/ompi/mca/pml/ucx/pml_ucx.c +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -197,22 +197,27 @@ int mca_pml_ucx_open(void) } /* Initialize UCX context */ - params.field_mask = UCP_PARAM_FIELD_FEATURES | - UCP_PARAM_FIELD_REQUEST_SIZE | - UCP_PARAM_FIELD_REQUEST_INIT | - UCP_PARAM_FIELD_REQUEST_CLEANUP | - UCP_PARAM_FIELD_TAG_SENDER_MASK | - UCP_PARAM_FIELD_MT_WORKERS_SHARED | - UCP_PARAM_FIELD_ESTIMATED_NUM_EPS; - params.features = UCP_FEATURE_TAG; - params.request_size = sizeof(ompi_request_t); - params.request_init = mca_pml_ucx_request_init; - params.request_cleanup = mca_pml_ucx_request_cleanup; - params.tag_sender_mask = PML_UCX_SPECIFIC_SOURCE_MASK; + params.field_mask = UCP_PARAM_FIELD_FEATURES | + UCP_PARAM_FIELD_REQUEST_SIZE | + UCP_PARAM_FIELD_REQUEST_INIT | + UCP_PARAM_FIELD_REQUEST_CLEANUP | + UCP_PARAM_FIELD_TAG_SENDER_MASK | + UCP_PARAM_FIELD_MT_WORKERS_SHARED | + UCP_PARAM_FIELD_ESTIMATED_NUM_EPS; + params.features = UCP_FEATURE_TAG; + params.request_size = sizeof(ompi_request_t); + params.request_init = mca_pml_ucx_request_init; + params.request_cleanup = mca_pml_ucx_request_cleanup; + params.tag_sender_mask = PML_UCX_SPECIFIC_SOURCE_MASK; params.mt_workers_shared = 0; /* we do not need mt support for context since it will be protected by worker */ params.estimated_num_eps = ompi_proc_world_size(); +#if HAVE_DECL_UCP_PARAM_FIELD_ESTIMATED_NUM_PPN + params.estimated_num_ppn = opal_process_info.num_local_peers + 1; + params.field_mask |= UCP_PARAM_FIELD_ESTIMATED_NUM_PPN; +#endif + status = ucp_init(¶ms, config, &ompi_pml_ucx.ucp_context); ucp_config_release(config); diff --git a/oshmem/mca/spml/ucx/spml_ucx_component.c b/oshmem/mca/spml/ucx/spml_ucx_component.c index 0f0ce2a15a3..3d29bd4e5d8 100644 --- a/oshmem/mca/spml/ucx/spml_ucx_component.c +++ b/oshmem/mca/spml/ucx/spml_ucx_component.c @@ -212,8 +212,12 @@ static int spml_ucx_init(void) opal_common_ucx_mca_register(); memset(¶ms, 0, sizeof(params)); - params.field_mask = UCP_PARAM_FIELD_FEATURES|UCP_PARAM_FIELD_ESTIMATED_NUM_EPS|UCP_PARAM_FIELD_MT_WORKERS_SHARED; - params.features = UCP_FEATURE_RMA|UCP_FEATURE_AMO32|UCP_FEATURE_AMO64; + params.field_mask = UCP_PARAM_FIELD_FEATURES | + UCP_PARAM_FIELD_ESTIMATED_NUM_EPS | + UCP_PARAM_FIELD_MT_WORKERS_SHARED; + params.features = UCP_FEATURE_RMA | + UCP_FEATURE_AMO32 | + UCP_FEATURE_AMO64; params.estimated_num_eps = ompi_proc_world_size(); if (oshmem_mpi_thread_requested == SHMEM_THREAD_MULTIPLE) { params.mt_workers_shared = 1; @@ -221,6 +225,11 @@ static int spml_ucx_init(void) params.mt_workers_shared = 0; } +#if HAVE_DECL_UCP_PARAM_FIELD_ESTIMATED_NUM_PPN + params.estimated_num_ppn = opal_process_info.num_local_peers + 1; + params.field_mask |= UCP_PARAM_FIELD_ESTIMATED_NUM_PPN; +#endif + err = ucp_init(¶ms, ucp_config, &mca_spml_ucx.ucp_context); ucp_config_release(ucp_config); if (UCS_OK != err) { From 167ca31a311f6542a06011d25dba60e10d4114b2 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 9 Aug 2019 13:13:45 -0700 Subject: [PATCH 61/94] Update PMIx to official v3.1.4 release Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix3x/pmix/NEWS | 61 + opal/mca/pmix/pmix3x/pmix/VERSION | 12 +- .../pmix3x/pmix/config/c_get_alignment.m4 | 8 +- opal/mca/pmix/pmix3x/pmix/config/pmix.m4 | 16 +- .../config/pmix_check_compiler_version.m4 | 7 +- .../pmix/pmix3x/pmix/config/pmix_check_icc.m4 | 7 +- .../pmix3x/pmix/config/pmix_setup_libev.m4 | 96 ++ .../pmix3x/pmix/config/pmix_setup_libevent.m4 | 197 +-- opal/mca/pmix/pmix3x/pmix/configure.ac | 1 - opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec | 2 +- opal/mca/pmix/pmix3x/pmix/examples/client.c | 3 + opal/mca/pmix/pmix3x/pmix/examples/dynamic.c | 82 +- opal/mca/pmix/pmix3x/pmix/src/client/pmi1.c | 20 +- .../pmix/pmix3x/pmix/src/client/pmix_client.c | 4 +- .../pmix/src/client/pmix_client_fence.c | 18 +- .../pmix3x/pmix/src/client/pmix_client_get.c | 95 +- .../pmix/pmix3x/pmix/src/common/pmix_log.c | 14 +- .../pmix/pmix3x/pmix/src/common/pmix_query.c | 44 +- .../pmix/src/event/pmix_event_notification.c | 2 +- .../pmix3x/pmix/src/include/pmix_globals.c | 31 + .../pmix3x/pmix/src/include/pmix_globals.h | 13 + opal/mca/pmix/pmix3x/pmix/src/include/types.h | 60 +- .../mca/base/pmix_mca_base_component_find.c | 6 +- .../pmix/src/mca/base/pmix_mca_base_open.c | 6 +- .../pmix3x/pmix/src/mca/bfrops/v20/copy.c | 26 +- .../src/mca/common/dstore/dstore_segment.c | 9 +- .../pmix3x/pmix/src/mca/gds/hash/gds_hash.c | 1181 +++++++++++++++-- .../pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c | 4 +- .../src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c | 4 +- .../src/mca/pif/linux_ipv6/pif_linux_ipv6.c | 17 +- opal/mca/pmix/pmix3x/pmix/src/mca/pif/pif.h | 4 +- .../mca/pif/solaris_ipv6/pif_solaris_ipv6.c | 4 +- .../pmix/src/mca/plog/base/plog_base_stubs.c | 12 +- .../pmix/src/mca/plog/stdfd/plog_stdfd.c | 6 +- .../pmix/src/mca/pnet/base/pnet_base_fns.c | 14 +- .../pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c | 4 +- .../pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp.c | 11 +- .../pmix/src/mca/pshmem/mmap/pshmem_mmap.c | 6 +- .../pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c | 6 +- .../pmix/src/mca/ptl/tcp/ptl_tcp_component.c | 28 +- .../pmix3x/pmix/src/mca/ptl/usock/ptl_usock.c | 4 +- .../src/mca/ptl/usock/ptl_usock_component.c | 2 +- .../pmix3x/pmix/src/runtime/pmix_finalize.c | 8 +- .../pmix/pmix3x/pmix/src/runtime/pmix_init.c | 10 +- .../pmix/src/runtime/pmix_progress_threads.c | 148 ++- .../pmix/pmix3x/pmix/src/server/pmix_server.c | 35 +- .../pmix3x/pmix/src/server/pmix_server_get.c | 83 +- .../pmix3x/pmix/src/server/pmix_server_ops.c | 32 +- .../mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c | 9 +- opal/mca/pmix/pmix3x/pmix/src/util/compress.c | 10 +- opal/mca/pmix/pmix3x/pmix/src/util/name_fns.c | 46 +- opal/mca/pmix/pmix3x/pmix/src/util/output.c | 6 +- opal/mca/pmix/pmix3x/pmix/src/util/pif.h | 6 +- .../pmix/pmix3x/pmix/src/util/pmix_environ.c | 78 +- opal/mca/pmix/pmix3x/pmix/test/cli_stages.c | 16 +- opal/mca/pmix/pmix3x/pmix/test/cli_stages.h | 4 +- opal/mca/pmix/pmix3x/pmix/test/pmi_client.c | 59 +- .../pmix/pmix3x/pmix/test/simple/Makefile.am | 4 +- .../mca/pmix/pmix3x/pmix/test/simple/gwtest.c | 10 +- .../pmix3x/pmix/test/simple/quietclient.c | 117 +- .../pmix/pmix3x/pmix/test/simple/simpclient.c | 126 +- .../pmix/pmix3x/pmix/test/simple/simpdie.c | 11 +- .../pmix/pmix3x/pmix/test/simple/simpdmodex.c | 78 +- .../pmix/pmix3x/pmix/test/simple/simpdyn.c | 37 +- .../mca/pmix/pmix3x/pmix/test/simple/simpft.c | 11 +- .../pmix/pmix3x/pmix/test/simple/simpjctrl.c | 13 +- .../pmix/pmix3x/pmix/test/simple/simplegacy.c | 248 +--- .../pmix/pmix3x/pmix/test/simple/simppub.c | 11 +- .../pmix/pmix3x/pmix/test/simple/simptest.c | 266 +++- .../pmix3x/pmix/test/simple/simptimeout.c | 10 +- .../pmix/pmix3x/pmix/test/simple/stability.c | 337 ++++- opal/mca/pmix/pmix3x/pmix/test/test_common.h | 6 +- opal/mca/pmix/pmix3x/pmix/test/test_error.c | 6 +- opal/mca/pmix/pmix3x/pmix/test/test_server.c | 36 +- 74 files changed, 2970 insertions(+), 1054 deletions(-) create mode 100644 opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libev.m4 diff --git a/opal/mca/pmix/pmix3x/pmix/NEWS b/opal/mca/pmix/pmix3x/pmix/NEWS index 6445b18275d..f18016dd7f2 100644 --- a/opal/mca/pmix/pmix3x/pmix/NEWS +++ b/opal/mca/pmix/pmix3x/pmix/NEWS @@ -21,6 +21,28 @@ example, a bug might be fixed in the master, and then moved to multiple release branches. +3.1.4 -- 9 Aug 2019 +---------------------- +- PR #1342: Fix if_linux_ipv6_open interface filter +- PR #1344: Remove unnecessary libtool init for c++ +- PR #1346: Fix incorrect pointer casts/deref +- PR #1347/#1348: Fix use of gethostname +- PR #1353/#1357: util/environ: use setenv() if available +- PR #1354: Plug a misc memory leak in the pmix_query_caddy_t destructor +- PR #1356: Fix another pointer cast/deref in test suite +- PR #1358: Implement support for class-based info arrays +- PR #1359: Plug misc minor memory leaks +- PR #1368: Backport support for libev +- PR #1369: Fix legacy support for PMI-1 +- PR #1370: Cleanup handling of data requests for different nspaces +- PR #1193: Resolve get of proc-specific job-level info from another nspace +- PR #1376: Fix problems in the Log code path, updates to simple test suite +- PR #1377: Skip fastpath/dstore for NULL keys +- PR #1379: Change IF_NAMESIZE to PMIX_IF_NAMESIZE and set to safe size +- PR #1385: Check for EINVAL return from posix_fallocate +- PR #1389: Plug misc memory leaks in configure + + 3.1.3 -- 2 July 2019 ---------------------- - PR #1096: Restore PMIX_NUM_SLOTS for backward compatibility @@ -120,6 +142,45 @@ multiple release branches. - Fix a bug when registering default event handlers +3.1.0 -- 17 Jan 2019 +---------------------- +**** THIS RELEASE MARKS THE STARTING POINT FOR FULL COMPLIANCE +**** WITH THE PMIX v3 STANDARD. ALL API BEHAVIORS AND ATTRIBUTE +**** DEFINITIONS MEET THE v3 STANDARD SPECIFICATIONS. + - Add a new, faster dstore GDS component 'ds21' + - Performance optimizations for the dstore GDS components. + - Plug miscellaneous memory leaks + - Silence an unnecessary warning message when checking connection + to a non-supporting server + - Ensure lost-connection events get delivered to default event + handlers + - Correctly handle cache refresh for queries + - Protect against race conditions between host and internal library + when dealing with async requests + - Cleanup tool operations and add support for connections to + remote servers. Initial support for debugger direct/indirect + launch verified with PRRTE. Cleanup setting of tmpdir options. + Drop rendezvous files when acting as a launcher + - Automatically store the server URI for easy access by client + - Provide MCA parameter to control TCP connect retry/timeout + - Update event notification system to properly evict oldest events + when more space is needed + - Fix a number of error paths + - Update IOF cache code to properly drop oldest message. Provide + MCA parameter for setting cache size. + - Handle setsockopt(SO_RCVTIMEO) not being supported + - Ensure that epilogs get run even when connections unexpectedly + terminate. Properly split epilog strings to process multiple + paths + - Pass the tool's command line to the server so it can be returned + in queries + - Add support for C11 atomics + - Support collection and forwarding of fabric-specific envars + - Improve handling of hwloc configure option + - Fix PMIx_server_generate_regex to preserve node ordering + - Fix a bug when registering default event handlers + + 3.0.2 -- 18 Sept 2018 ---------------------- - Ensure we cleanup any active sensors when a peer departs. Allow the diff --git a/opal/mca/pmix/pmix3x/pmix/VERSION b/opal/mca/pmix/pmix3x/pmix/VERSION index 13511b92a55..8d2b40af3ea 100644 --- a/opal/mca/pmix/pmix3x/pmix/VERSION +++ b/opal/mca/pmix/pmix3x/pmix/VERSION @@ -15,7 +15,7 @@ major=3 minor=1 -release=3 +release=4 # greek is used for alpha or beta release tags. If it is non-empty, # it will be appended to the version number. It does not have to be @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=gitc10fd1d4 +repo_rev=gite6837057 # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Jul 02, 2019" +date="Aug 09, 2019" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library @@ -75,8 +75,8 @@ date="Jul 02, 2019" # Version numbers are described in the Libtool current:revision:age # format. -libpmix_so_version=4:23:2 -libpmi_so_version=1:0:0 +libpmix_so_version=4:24:2 +libpmi_so_version=1:1:0 libpmi2_so_version=1:0:0 # "Common" components install standalone libraries that are run-time @@ -84,4 +84,4 @@ libpmi2_so_version=1:0:0 # # well. Yuck; this somewhat breaks the # # components-don't-affect-the-build-system abstraction. # -libmca_common_dstore_so_version=1:0:0 +libmca_common_dstore_so_version=1:1:0 diff --git a/opal/mca/pmix/pmix3x/pmix/config/c_get_alignment.m4 b/opal/mca/pmix/pmix3x/pmix/config/c_get_alignment.m4 index db379100994..6596c0ae88d 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/c_get_alignment.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/c_get_alignment.m4 @@ -11,9 +11,9 @@ dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. -dnl Copyright (c) 2014-2015 Intel, Inc. All rights reserved. -dnl Copyright (c) 2015 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2014-2019 Intel, Inc. All rights reserved. +dnl Copyright (c) 2015-2019 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -44,7 +44,9 @@ AC_DEFUN([PMIX_C_GET_ALIGNMENT],[ FILE *f=fopen("conftestval", "w"); if (!f) exit(1); diff = ((char *)&p->x) - ((char *)&p->c); + free(p); fprintf(f, "%d\n", (diff >= 0) ? diff : -diff); + fclose(f); ]])], [AS_TR_SH([pmix_cv_c_align_$1])=`cat conftestval`], [AC_MSG_WARN([*** Problem running configure test!]) AC_MSG_WARN([*** See config.log for details.]) diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 index e0585256c49..1d37089f8ae 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix.m4 @@ -663,7 +663,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[ # -lrt might be needed for clock_gettime PMIX_SEARCH_LIBS_CORE([clock_gettime], [rt]) - AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen posix_fallocate tcgetpgrp]) + AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep statfs statvfs getpeereid getpeerucred strnlen posix_fallocate tcgetpgrp setpgid ptsname openpty setenv]) # On some hosts, htonl is a define, so the AC_CHECK_FUNC will get # confused. On others, it's in the standard library, but stubbed with @@ -754,8 +754,22 @@ AC_DEFUN([PMIX_SETUP_CORE],[ ################################## pmix_show_title "Libevent" + PMIX_LIBEV_CONFIG PMIX_LIBEVENT_CONFIG + AS_IF([test $pmix_libevent_support -eq 1 && test $pmix_libev_support -eq 1], + [AC_MSG_WARN([Both libevent and libev support have been specified.]) + AC_MSG_WARN([Only one can be configured against at a time. Please]) + AC_MSG_WARN([remove one from the configure command line.]) + AC_MSG_ERROR([Cannot continue])]) + + AS_IF([test $pmix_libevent_support -eq 0 && test $pmix_libev_support -eq 0], + [AC_MSG_WARN([Either libevent or libev support is required, but neither]) + AC_MSG_WARN([was found. Please use the configure options to point us]) + AC_MSG_WARN([to where we can find one or the other library]) + AC_MSG_ERROR([Cannot continue])]) + + ################################## # HWLOC ################################## diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_check_compiler_version.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_check_compiler_version.m4 index eca2013be82..da822b04810 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_check_compiler_version.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_check_compiler_version.m4 @@ -1,7 +1,9 @@ dnl -*- shell-script -*- dnl dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. -dnl Copyright (c) 2013-2017 Intel, Inc. All rights reserved. +dnl Copyright (c) 2013-2019 Intel, Inc. All rights reserved. +dnl Copyright (c) 2019 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl dnl $COPYRIGHT$ dnl @@ -43,6 +45,7 @@ int main (int argc, char * argv[]) f=fopen("conftestval", "w"); if (!f) exit(1); fprintf (f, "%d", PLATFORM_COMPILER_$1); + fclose(f); return 0; } ], [ @@ -75,6 +78,7 @@ int main (int argc, char * argv[]) f=fopen("conftestval", "w"); if (!f) exit(1); fprintf (f, "%s", PLATFORM_COMPILER_$1); + fclose(f); return 0; } ], [ @@ -110,6 +114,7 @@ int main (int argc, char * argv[]) f=fopen("conftestval", "w"); if (!f) exit(1); fprintf (f, "%s", _STRINGIFY(PLATFORM_COMPILER_$1)); + fclose(f); return 0; } ], [ diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_check_icc.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_check_icc.m4 index e8a06b25148..05ce9431bd3 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_check_icc.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_check_icc.m4 @@ -10,9 +10,9 @@ dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2005 The Regents of the University of California. dnl All rights reserved. -dnl Copyright (c) 2014 Intel, Inc. All rights reserved. -dnl Copyright (c) 2016 Research Organization for Information Science -dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2014-2019 Intel, Inc. All rights reserved. +dnl Copyright (c) 2016-2019 Research Organization for Information Science +dnl and Technology (RIST). All rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -45,6 +45,7 @@ int main () func (4711, "Help %d [%s]\n", 10, "ten"); f=fopen ("conftestval", "w"); if (!f) exit (1); + fclose(f); return 0; } diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libev.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libev.m4 new file mode 100644 index 00000000000..494cc2a2c39 --- /dev/null +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libev.m4 @@ -0,0 +1,96 @@ +# -*- shell-script -*- +# +# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2019 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_libev_CONFIG([action-if-found], [action-if-not-found]) +# -------------------------------------------------------------------- +AC_DEFUN([PMIX_LIBEV_CONFIG],[ + PMIX_VAR_SCOPE_PUSH([pmix_libev_dir pmix_libev_libdir pmix_libev_standard_header_location pmix_libev_standard_lib_location]) + + AC_ARG_WITH([libev], + [AC_HELP_STRING([--with-libev=DIR], + [Search for libev headers and libraries in DIR ])]) + PMIX_CHECK_WITHDIR([libev], [$with_libev], [include/event.h]) + + AC_ARG_WITH([libev-libdir], + [AC_HELP_STRING([--with-libev-libdir=DIR], + [Search for libev libraries in DIR ])]) + PMIX_CHECK_WITHDIR([libev-libdir], [$with_livev_libdir], [libev.*]) + + pmix_libev_support=0 + + AS_IF([test -n "$with_libev" && test "$with_libev" != "no"], + [AC_MSG_CHECKING([for libev in]) + pmix_check_libev_save_CPPFLAGS="$CPPFLAGS" + pmix_check_libeve_save_LDFLAGS="$LDFLAGS" + pmix_check_libev_save_LIBS="$LIBS" + if test "$with_libev" != "yes"; then + pmix_libev_dir=$with_libev/include + pmix_libev_standard_header_location=no + pmix_libev_standard_lib_location=no + AS_IF([test -z "$with_libev_libdir" || test "$with_libev_libdir" = "yes"], + [if test -d $with_libev/lib; then + pmix_libev_libdir=$with_libev/lib + elif test -d $with_libev/lib64; then + pmix_libev_libdir=$with_libev/lib64 + else + AC_MSG_RESULT([Could not find $with_libev/lib or $with_libev/lib64]) + AC_MSG_ERROR([Can not continue]) + fi + AC_MSG_RESULT([$pmix_libev_dir and $pmix_libev_libdir])], + [AC_MSG_RESULT([$with_libev_libdir])]) + else + AC_MSG_RESULT([(default search paths)]) + pmix_libev_standard_header_location=yes + pmix_libev_standard_lib_location=yes + fi + AS_IF([test ! -z "$with_libev_libdir" && test "$with_libev_libdir" != "yes"], + [pmix_libev_libdir="$with_libev_libdir" + pmix_libev_standard_lib_location=no]) + + PMIX_CHECK_PACKAGE([pmix_libev], + [event.h], + [ev], + [event_base_new], + [], + [$pmix_libev_dir], + [$pmix_libev_libdir], + [pmix_libev_support=1], + [pmix_libev_support=0]) + CPPFLAGS="$pmix_check_libev_save_CPPFLAGS" + LDFLAGS="$pmix_check_libev_save_LDFLAGS" + LIBS="$pmix_check_libev_save_LIBS"]) + + AS_IF([test $pmix_libev_support -eq 1], + [LIBS="$LIBS $pmix_libev_LIBS" + + AS_IF([test "$pmix_libev_standard_header_location" != "yes"], + [CPPFLAGS="$CPPFLAGS $pmix_libev_CPPFLAGS"]) + AS_IF([test "$pmix_libev_standard_lib_location" != "yes"], + [LDFLAGS="$LDFLAGS $pmix_libev_LDFLAGS"])]) + + AC_MSG_CHECKING([will libev support be built]) + if test $pmix_libev_support -eq 1; then + AC_MSG_RESULT([yes]) + PMIX_EVENT_HEADER="" + AC_DEFINE_UNQUOTED([PMIX_EVENT_HEADER], [$PMIX_EVENT_HEADER], + [Location of event.h]) + PMIX_SUMMARY_ADD([[External Packages]],[[libev]],[libev],[$pmix_libev_dir]) + else + AC_MSG_RESULT([no]) + fi + + AC_DEFINE_UNQUOTED([PMIX_HAVE_LIBEV], [$pmix_libev_support], [Whether we are building against libev]) + + PMIX_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libevent.m4 b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libevent.m4 index 949af1cfd58..28e3a412273 100644 --- a/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libevent.m4 +++ b/opal/mca/pmix/pmix3x/pmix/config/pmix_setup_libevent.m4 @@ -3,8 +3,8 @@ # Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. # Copyright (c) 2013-2019 Intel, Inc. All rights reserved. -# Copyright (c) 2017 Research Organization for Information Science -# and Technology (RIST). All rights reserved. +# Copyright (c) 2017-2019 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -19,20 +19,25 @@ AC_DEFUN([PMIX_LIBEVENT_CONFIG],[ [AC_HELP_STRING([--with-libevent-header=HEADER], [The value that should be included in C files to include event.h])]) + pmix_libevent_support=0 + AS_IF([test "$pmix_mode" = "embedded"], [_PMIX_LIBEVENT_EMBEDDED_MODE], - [_PMIX_LIBEVENT_EXTERNAL]) - - AC_MSG_CHECKING([libevent header]) - AC_DEFINE_UNQUOTED([PMIX_EVENT_HEADER], [$PMIX_EVENT_HEADER], - [Location of event.h]) - AC_MSG_RESULT([$PMIX_EVENT_HEADER]) - AC_MSG_CHECKING([libevent2/thread header]) - AC_DEFINE_UNQUOTED([PMIX_EVENT2_THREAD_HEADER], [$PMIX_EVENT2_THREAD_HEADER], - [Location of event2/thread.h]) - AC_MSG_RESULT([$PMIX_EVENT2_THREAD_HEADER]) - - PMIX_SUMMARY_ADD([[External Packages]],[[Libevent]], [pmix_libevent], [yes ($pmix_libevent_source)]) + [AS_IF([test $pmix_libev_support -eq 0], + [_PMIX_LIBEVENT_EXTERNAL])]) + + if test $pmix_libevent_support -eq 1; then + AC_MSG_CHECKING([libevent header]) + AC_DEFINE_UNQUOTED([PMIX_EVENT_HEADER], [$PMIX_EVENT_HEADER], + [Location of event.h]) + AC_MSG_RESULT([$PMIX_EVENT_HEADER]) + AC_MSG_CHECKING([libevent2/thread header]) + AC_DEFINE_UNQUOTED([PMIX_EVENT2_THREAD_HEADER], [$PMIX_EVENT2_THREAD_HEADER], + [Location of event2/thread.h]) + AC_MSG_RESULT([$PMIX_EVENT2_THREAD_HEADER]) + + PMIX_SUMMARY_ADD([[External Packages]],[[Libevent]], [pmix_libevent], [yes ($pmix_libevent_source)]) + fi ]) AC_DEFUN([_PMIX_LIBEVENT_EMBEDDED_MODE],[ @@ -46,7 +51,8 @@ AC_DEFUN([_PMIX_LIBEVENT_EMBEDDED_MODE],[ PMIX_EVENT2_THREAD_HEADER="$with_libevent_header"]) pmix_libevent_source=embedded - ]) + pmix_libevent_support=1 +]) AC_DEFUN([_PMIX_LIBEVENT_EXTERNAL],[ PMIX_VAR_SCOPE_PUSH([pmix_event_dir pmix_event_libdir pmix_event_defaults]) @@ -55,88 +61,109 @@ AC_DEFUN([_PMIX_LIBEVENT_EXTERNAL],[ [AC_HELP_STRING([--with-libevent=DIR], [Search for libevent headers and libraries in DIR ])]) - # Bozo check - AS_IF([test "$with_libevent" = "no"], - [AC_MSG_WARN([It is not possible to configure PMIx --without-libevent]) - AC_MSG_ERROR([Cannot continue])]) - AC_ARG_WITH([libevent-libdir], [AC_HELP_STRING([--with-libevent-libdir=DIR], [Search for libevent libraries in DIR ])]) + pmix_check_libevent_save_CPPFLAGS="$CPPFLAGS" + pmix_check_libevent_save_LDFLAGS="$LDFLAGS" + pmix_check_libevent_save_LIBS="$LIBS" + # get rid of the trailing slash(es) libevent_prefix=$(echo $with_libevent | sed -e 'sX/*$XXg') libeventdir_prefix=$(echo $with_libevent_libdir | sed -e 'sX/*$XXg') - AC_MSG_CHECKING([for libevent in]) - if test ! -z "$libevent_prefix" && test "$libevent_prefix" != "yes"; then - pmix_event_defaults=no - pmix_event_dir=$libevent_prefix - if test -d $libevent_prefix/lib; then - pmix_event_libdir=$libevent_prefix/lib - elif test -d $libevent_prefix/lib64; then - pmix_event_libdir=$libevent_prefix/lib64 - elif test -d $libevent_prefix; then - pmix_event_libdir=$libevent_prefix + if test "$libevent_prefix" != "no"; then + AC_MSG_CHECKING([for libevent in]) + if test ! -z "$libevent_prefix" && test "$libevent_prefix" != "yes"; then + pmix_event_defaults=no + pmix_event_dir=$libevent_prefix/include + if test -d $libevent_prefix/lib; then + pmix_event_libdir=$libevent_prefix/lib + elif test -d $libevent_prefix/lib64; then + pmix_event_libdir=$libevent_prefix/lib64 + elif test -d $libevent_prefix; then + pmix_event_libdir=$libevent_prefix + else + AC_MSG_RESULT([Could not find $libevent_prefix/lib, $libevent_prefix/lib64, or $libevent_prefix]) + AC_MSG_ERROR([Can not continue]) + fi + AC_MSG_RESULT([$pmix_event_dir and $pmix_event_libdir]) else - AC_MSG_RESULT([Could not find $libevent_prefix/lib, $libevent_prefix/lib64, or $libevent_prefix]) - AC_MSG_ERROR([Can not continue]) + pmix_event_defaults=yes + pmix_event_dir=/usr/include + if test -d /usr/lib; then + pmix_event_libdir=/usr/lib + AC_MSG_RESULT([(default search paths)]) + elif test -d /usr/lib64; then + pmix_event_libdir=/usr/lib64 + AC_MSG_RESULT([(default search paths)]) + else + AC_MSG_RESULT([default paths not found]) + pmix_libevent_support=0 + fi fi - AC_MSG_RESULT([$pmix_event_dir and $pmix_event_libdir]) - else - pmix_event_defaults=yes - pmix_event_dir=/usr/include - if test -d /usr/lib; then - pmix_event_libdir=/usr/lib - elif test -d /usr/lib64; then - pmix_event_libdir=/usr/lib64 - else - AC_MSG_RESULT([not found]) - AC_MSG_WARN([Could not find /usr/lib or /usr/lib64 - you may]) - AC_MSG_WARN([need to specify --with-libevent-libdir=]) - AC_MSG_ERROR([Can not continue]) + AS_IF([test ! -z "$libeventdir_prefix" && "$libeventdir_prefix" != "yes"], + [pmix_event_libdir="$libeventdir_prefix"]) + + PMIX_CHECK_PACKAGE([pmix_libevent], + [event.h], + [event], + [event_config_new], + [-levent -levent_pthreads], + [$pmix_event_dir], + [$pmix_event_libdir], + [pmix_libevent_support=1], + [pmix_libevent_support=0]) + + AS_IF([test "$pmix_event_defaults" = "no"], + [PMIX_FLAGS_APPEND_UNIQ(CPPFLAGS, $pmix_libevent_CPPFLAGS) + PMIX_FLAGS_APPEND_UNIQ(LDFLAGS, $pmix_libevent_LDFLAGS)]) + PMIX_FLAGS_APPEND_UNIQ(LIBS, $pmix_libevent_LIBS) + + if test $pmix_libevent_support -eq 1; then + # Ensure that this libevent has the symbol + # "evthread_set_lock_callbacks", which will only exist if + # libevent was configured with thread support. + AC_CHECK_LIB([event], [evthread_set_lock_callbacks], + [], + [AC_MSG_WARN([External libevent does not have thread support]) + AC_MSG_WARN([PMIx requires libevent to be compiled with]) + AC_MSG_WARN([thread support enabled]) + pmix_libevent_support=0]) + fi + if test $pmix_libevent_support -eq 1; then + AC_CHECK_LIB([event_pthreads], [evthread_use_pthreads], + [], + [AC_MSG_WARN([External libevent does not have thread support]) + AC_MSG_WARN([PMIx requires libevent to be compiled with]) + AC_MSG_WARN([thread support enabled]) + pmix_libevent_support=0]) fi - AC_MSG_RESULT([(default search paths)]) fi - AS_IF([test ! -z "$libeventdir_prefix" && "$libeventdir_prefix" != "yes"], - [pmix_event_libdir="$libeventdir_prefix"]) - - PMIX_CHECK_PACKAGE([pmix_libevent], - [event.h], - [event], - [event_config_new], - [-levent -levent_pthreads], - [$pmix_event_dir], - [$pmix_event_libdir], - [], - [AC_MSG_WARN([LIBEVENT SUPPORT NOT FOUND]) - AC_MSG_ERROR([CANNOT CONTINUE])]) - - AS_IF([test "$pmix_event_defaults" = "no"], - [PMIX_FLAGS_APPEND_UNIQ(CPPFLAGS, $pmix_libevent_CPPFLAGS) - PMIX_FLAGS_APPEND_UNIQ(LDFLAGS, $pmix_libevent_LDFLAGS)]) - PMIX_FLAGS_APPEND_UNIQ(LIBS, $pmix_libevent_LIBS) - - # Ensure that this libevent has the symbol - # "evthread_set_lock_callbacks", which will only exist if - # libevent was configured with thread support. - AC_CHECK_LIB([event], [evthread_set_lock_callbacks], - [], - [AC_MSG_WARN([External libevent does not have thread support]) - AC_MSG_WARN([PMIx requires libevent to be compiled with]) - AC_MSG_WARN([thread support enabled]) - AC_MSG_ERROR([Cannot continue])]) - AC_CHECK_LIB([event_pthreads], [evthread_use_pthreads], - [], - [AC_MSG_WARN([External libevent does not have thread support]) - AC_MSG_WARN([PMIx requires libevent to be compiled with]) - AC_MSG_WARN([thread support enabled]) - AC_MSG_ERROR([Cannot continue])]) - - # Set output variables - PMIX_EVENT_HEADER="" - PMIX_EVENT2_THREAD_HEADER="" - pmix_libevent_source=$pmix_event_dir + + CPPFLAGS="$pmix_check_libevent_save_CPPFLAGS" + LDFLAGS="$pmix_check_libevent_save_LDFLAGS" + LIBS="$pmix_check_libevent_save_LIBS" + + AC_MSG_CHECKING([will libevent support be built]) + if test $pmix_libevent_support -eq 1; then + AC_MSG_RESULT([yes]) + # Set output variables + PMIX_EVENT_HEADER="" + PMIX_EVENT2_THREAD_HEADER="" + AC_DEFINE_UNQUOTED([PMIX_EVENT_HEADER], [$PMIX_EVENT_HEADER], + [Location of event.h]) + pmix_libevent_source=$pmix_event_dir + AS_IF([test "$pmix_event_defaults" = "no"], + [PMIX_FLAGS_APPEND_UNIQ(CPPFLAGS, $pmix_libevent_CPPFLAGS) + PMIX_FLAGS_APPEND_UNIQ(LDFLAGS, $pmix_libevent_LDFLAGS)]) + PMIX_FLAGS_APPEND_UNIQ(LIBS, $pmix_libevent_LIBS) + else + AC_MSG_RESULT([no]) + fi + + AC_DEFINE_UNQUOTED([PMIX_HAVE_LIBEVENT], [$pmix_libevent_support], [Whether we are building against libevent]) PMIX_VAR_SCOPE_POP ])dnl diff --git a/opal/mca/pmix/pmix3x/pmix/configure.ac b/opal/mca/pmix/pmix3x/pmix/configure.ac index 3cbd07e8914..4bab86e668f 100644 --- a/opal/mca/pmix/pmix3x/pmix/configure.ac +++ b/opal/mca/pmix/pmix3x/pmix/configure.ac @@ -201,7 +201,6 @@ AS_IF([test "$pmix_debug" = "1"], LT_INIT() LT_LANG([C]) -LT_LANG([C++]) ############################################################################ # Setup the core diff --git a/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec b/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec index 6a47f054807..ae488781f7e 100644 --- a/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec +++ b/opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec @@ -192,7 +192,7 @@ Summary: An extended/exascale implementation of PMI Name: %{?_name:%{_name}}%{!?_name:pmix} -Version: 3.1.3 +Version: 3.1.4 Release: 1%{?dist} License: BSD Group: Development/Libraries diff --git a/opal/mca/pmix/pmix3x/pmix/examples/client.c b/opal/mca/pmix/pmix3x/pmix/examples/client.c index 519ef649d62..49e471fb258 100644 --- a/opal/mca/pmix/pmix3x/pmix/examples/client.c +++ b/opal/mca/pmix/pmix3x/pmix/examples/client.c @@ -254,6 +254,9 @@ int main(int argc, char **argv) fprintf(stderr, "Client ns %s rank %d: PMIx_Commit failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } + if (0 == myproc.rank) { + sleep(2); + } /* call fence to synchronize with our peers - instruct * the fence operation to collect and return all "put" diff --git a/opal/mca/pmix/pmix3x/pmix/examples/dynamic.c b/opal/mca/pmix/pmix3x/pmix/examples/dynamic.c index d7c0d3701ff..6b929420b7e 100644 --- a/opal/mca/pmix/pmix3x/pmix/examples/dynamic.c +++ b/opal/mca/pmix/pmix3x/pmix/examples/dynamic.c @@ -49,9 +49,7 @@ int main(int argc, char **argv) char nsp2[PMIX_MAX_NSLEN+1]; pmix_app_t *app; char hostname[1024], dir[1024]; - pmix_proc_t *peers; - size_t npeers, ntmp=0; - char *nodelist; + size_t ntmp=0; if (0 > gethostname(hostname, sizeof(hostname))) { exit(1); @@ -71,14 +69,14 @@ int main(int argc, char **argv) (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - /* get our universe size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); + /* get our job size */ + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); + fprintf(stderr, "Client %s:%d job size %d\n", myproc.nspace, myproc.rank, nprocs); /* call fence to sync */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); @@ -103,13 +101,6 @@ int main(int argc, char **argv) app->env = (char**)malloc(2 * sizeof(char*)); app->env[0] = strdup("PMIX_ENV_VALUE=3"); app->env[1] = NULL; - PMIX_INFO_CREATE(app->info, 2); - (void)strncpy(app->info[0].key, "DARTH", PMIX_MAX_KEYLEN); - app->info[0].value.type = PMIX_INT8; - app->info[0].value.data.int8 = 12; - (void)strncpy(app->info[1].key, "VADER", PMIX_MAX_KEYLEN); - app->info[1].value.type = PMIX_DOUBLE; - app->info[1].value.data.dval = 12.34; fprintf(stderr, "Client ns %s rank %d: calling PMIx_Spawn\n", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Spawn(NULL, 0, app, 1, nsp2))) { @@ -122,65 +113,28 @@ int main(int argc, char **argv) val = NULL; (void)strncpy(proc.nspace, nsp2, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val)) || + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val)) || NULL == val) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); + fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } ntmp = val->data.uint32; PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d universe %s size %d\n", myproc.nspace, myproc.rank, nsp2, (int)ntmp); - } - - /* just cycle the connect/disconnect functions */ - (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Connect(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Connect failed: %d\n", myproc.nspace, myproc.rank, rc); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Connect succeeded\n", - myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Disconnect(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Disonnect failed: %d\n", myproc.nspace, myproc.rank, rc); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Disconnect succeeded\n", myproc.nspace, myproc.rank); + fprintf(stderr, "Client %s:%d job %s size %d\n", myproc.nspace, myproc.rank, nsp2, (int)ntmp); - /* finally, test the resolve functions */ - if (0 == myproc.rank) { - if (PMIX_SUCCESS != (rc = PMIx_Resolve_peers(hostname, NULL, &peers, &npeers))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers failed for nspace %s: %d\n", myproc.nspace, myproc.rank, nsp2, rc); - goto done; - } - if ((nprocs+ntmp) != npeers) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers returned incorrect npeers: %d vs %d\n", myproc.nspace, myproc.rank, (int)(nprocs+ntmp), (int)npeers); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers returned %d npeers\n", myproc.nspace, myproc.rank, (int)npeers); - if (PMIX_SUCCESS != (rc = PMIx_Resolve_nodes(nsp2, &nodelist))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes failed for nspace %s: %d\n", myproc.nspace, myproc.rank, nsp2, rc); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes %s", myproc.nspace, myproc.rank, nodelist); - } else { - if (PMIX_SUCCESS != (rc = PMIx_Resolve_peers(hostname, myproc.nspace, &peers, &npeers))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers failed for nspace %s: %d\n", myproc.nspace, myproc.rank, myproc.nspace, rc); - goto done; - } - if (nprocs != npeers) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers returned incorrect npeers: %d vs %d\n", myproc.nspace, myproc.rank, nprocs, (int)npeers); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers returned %d npeers\n", myproc.nspace, myproc.rank, (int)npeers); - if (PMIX_SUCCESS != (rc = PMIx_Resolve_nodes(myproc.nspace, &nodelist))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes failed: %d\n", myproc.nspace, myproc.rank, rc); + /* get a proc-specific value */ + val = NULL; + (void)strncpy(proc.nspace, nsp2, PMIX_MAX_NSLEN); + proc.rank = 1; + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_RANK, NULL, 0, &val)) || + NULL == val) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get local rank failed: %d\n", myproc.nspace, myproc.rank, rc); goto done; } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes %s\n", myproc.nspace, myproc.rank, nodelist); + ntmp = (int)val->data.uint16; + PMIX_VALUE_RELEASE(val); + fprintf(stderr, "Client %s:%d job %s local rank %d\n", myproc.nspace, myproc.rank, nsp2, (int)ntmp); } - PMIX_PROC_FREE(peers, npeers); - free(nodelist); done: /* call fence to sync */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmi1.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmi1.c index dd5cb66e7d9..56774e4fb24 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmi1.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmi1.c @@ -1,8 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ @@ -85,7 +85,7 @@ PMIX_EXPORT int PMI_Init(int *spawned) /* getting internal key requires special rank value */ memcpy(&proc, &myproc, sizeof(myproc)); - proc.rank = PMIX_RANK_UNDEF; + proc.rank = PMIX_RANK_WILDCARD; /* set controlling parameters * PMIX_OPTIONAL - expect that these keys should be available on startup @@ -394,8 +394,6 @@ PMIX_EXPORT int PMI_Get_appnum(int *appnum) pmix_value_t *val; pmix_info_t info[1]; bool val_optinal = 1; - pmix_proc_t proc = myproc; - proc.rank = PMIX_RANK_WILDCARD; PMI_CHECK(); @@ -414,11 +412,11 @@ PMIX_EXPORT int PMI_Get_appnum(int *appnum) PMIX_INFO_CONSTRUCT(&info[0]); PMIX_INFO_LOAD(&info[0], PMIX_OPTIONAL, &val_optinal, PMIX_BOOL); - rc = PMIx_Get(&proc, PMIX_APPNUM, info, 1, &val); + rc = PMIx_Get(&myproc, PMIX_APPNUM, info, 1, &val); if (PMIX_SUCCESS == rc) { rc = convert_int(appnum, val); PMIX_VALUE_RELEASE(val); - } else if( PMIX_ERR_NOT_FOUND == rc ){ + } else { /* this is optional value, set to 0 */ *appnum = 0; rc = PMIX_SUCCESS; @@ -445,7 +443,7 @@ PMIX_EXPORT int PMI_Publish_name(const char service_name[], const char port[]) } /* pass the service/port */ - pmix_strncpy(info.key, service_name, PMIX_MAX_KEYLEN); + pmix_strncpy(info.key, service_name, PMIX_MAX_KEYLEN); info.value.type = PMIX_STRING; info.value.data.string = (char*) port; @@ -497,7 +495,7 @@ PMIX_EXPORT int PMI_Lookup_name(const char service_name[], char port[]) PMIX_PDATA_CONSTRUCT(&pdata); /* pass the service */ - pmix_strncpy(pdata.key, service_name, PMIX_MAX_KEYLEN); + pmix_strncpy(pdata.key, service_name, PMIX_MAX_KEYLEN); /* PMI-1 doesn't want the nspace back */ if (PMIX_SUCCESS != (rc = PMIx_Lookup(&pdata, 1, NULL, 0))) { @@ -514,7 +512,7 @@ PMIX_EXPORT int PMI_Lookup_name(const char service_name[], char port[]) * potential we could overrun it. As this feature * isn't widely supported in PMI-1, try being * conservative */ - pmix_strncpy(port, pdata.value.data.string, PMIX_MAX_KEYLEN); + pmix_strncpy(port, pdata.value.data.string, PMIX_MAX_KEYLEN); PMIX_PDATA_DESTRUCT(&pdata); return PMIX_SUCCESS; diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c index a99f7141922..8d522e7a4fa 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -50,7 +50,9 @@ #include #endif #include PMIX_EVENT_HEADER +#if ! PMIX_HAVE_LIBEV #include PMIX_EVENT2_THREAD_HEADER +#endif static const char pmix_version_string[] = PMIX_VERSION; static pmix_status_t pmix_init_result = PMIX_ERR_INIT; diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_fence.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_fence.c index 7a587dcad46..adac9bbf4d3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_fence.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_fence.c @@ -1,8 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -73,7 +73,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, PMIX_ACQUIRE_THREAD(&pmix_global_lock); - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "pmix: executing fence"); if (pmix_globals.init_cntr <= 0) { @@ -106,7 +106,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs, rc = cb->status; PMIX_RELEASE(cb); - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "pmix: fence released"); return rc; @@ -125,7 +125,7 @@ PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs PMIX_ACQUIRE_THREAD(&pmix_global_lock); - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "pmix: fence_nb called"); if (pmix_globals.init_cntr <= 0) { @@ -185,7 +185,7 @@ static pmix_status_t unpack_return(pmix_buffer_t *data) pmix_status_t ret; int32_t cnt; - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "client:unpack fence called"); /* unpack the status code */ @@ -196,7 +196,7 @@ static pmix_status_t unpack_return(pmix_buffer_t *data) PMIX_ERROR_LOG(rc); return rc; } - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "client:unpack fence received status %d", ret); return ret; } @@ -255,7 +255,7 @@ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_ptl_hdr_t *hdr, pmix_cb_t *cb = (pmix_cb_t*)cbdata; pmix_status_t rc; - pmix_output_verbose(2, pmix_globals.debug_output, + pmix_output_verbose(2, pmix_client_globals.fence_output, "pmix: fence_nb callback recvd"); if (NULL == cb) { diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c index 46d6c62db8f..48cee715afe 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Artem Y. Polyakov . @@ -57,6 +57,7 @@ #include "src/util/compress.h" #include "src/util/error.h" #include "src/util/hash.h" +#include "src/util/name_fns.h" #include "src/util/output.h" #include "src/mca/gds/gds.h" #include "src/mca/ptl/ptl.h" @@ -99,14 +100,15 @@ PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, PMIX_RELEASE_THREAD(&pmix_global_lock); pmix_output_verbose(2, pmix_client_globals.get_output, - "pmix:client get for %s:%d key %s", - (NULL == proc) ? "NULL" : proc->nspace, - (NULL == proc) ? PMIX_RANK_UNDEF : proc->rank, + "pmix:client get for %s key %s", + (NULL == proc) ? "NULL" : PMIX_NAME_PRINT(proc), (NULL == key) ? "NULL" : key); /* try to get data directly, without threadshift */ - if (PMIX_SUCCESS == (rc = _getfn_fastpath(proc, key, info, ninfo, val))) { - goto done; + if (PMIX_RANK_UNDEF != proc->rank && NULL != key) { + if (PMIX_SUCCESS == (rc = _getfn_fastpath(proc, key, info, ninfo, val))) { + goto done; + } } /* create a callback object as we need to pass it to the @@ -329,9 +331,14 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, } if (PMIX_SUCCESS != ret) { + PMIX_ERROR_LOG(ret); goto done; } - PMIX_GDS_ACCEPT_KVS_RESP(rc, pmix_client_globals.myserver, buf); + if (PMIX_RANK_UNDEF == proc.rank) { + PMIX_GDS_ACCEPT_KVS_RESP(rc, pmix_globals.mypeer, buf); + } else { + PMIX_GDS_ACCEPT_KVS_RESP(rc, pmix_client_globals.myserver, buf); + } if (PMIX_SUCCESS != rc) { goto done; } @@ -350,7 +357,11 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, /* fetch the data from server peer module - since it is passing * it back to the user, we need a copy of it */ cb->copy = true; - PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + if (PMIX_RANK_UNDEF == proc.rank) { + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, cb); + } else { + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + } if (PMIX_SUCCESS == rc) { if (1 != pmix_list_get_size(&cb->kvs)) { rc = PMIX_ERR_INVALID_VAL; @@ -496,9 +507,15 @@ static pmix_status_t _getfn_fastpath(const pmix_proc_t *proc, const pmix_key_t k /* scan the incoming directives */ if (NULL != info) { for (n=0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_DATA_SCOPE, PMIX_MAX_KEYLEN)) { + if (PMIX_CHECK_KEY(&info[n], PMIX_DATA_SCOPE)) { cb->scope = info[n].value.data.scope; - break; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_OPTIONAL) || + PMIX_CHECK_KEY(&info[n], PMIX_IMMEDIATE)) { + continue; + } else { + /* we cannot handle any other directives via this path */ + PMIX_RELEASE(cb); + return PMIX_ERR_NOT_SUPPORTED; } } } @@ -508,16 +525,16 @@ static pmix_status_t _getfn_fastpath(const pmix_proc_t *proc, const pmix_key_t k cb->info = (pmix_info_t*)info; cb->ninfo = ninfo; - PMIX_GDS_FETCH_IS_TSAFE(rc, pmix_globals.mypeer); + PMIX_GDS_FETCH_IS_TSAFE(rc, pmix_client_globals.myserver); if (PMIX_SUCCESS == rc) { - PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, cb); + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); if (PMIX_SUCCESS == rc) { goto done; } } - PMIX_GDS_FETCH_IS_TSAFE(rc, pmix_client_globals.myserver); + PMIX_GDS_FETCH_IS_TSAFE(rc, pmix_globals.mypeer); if (PMIX_SUCCESS == rc) { - PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, cb); if (PMIX_SUCCESS == rc) { goto done; } @@ -551,23 +568,23 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* cb was passed to us from another thread - acquire it */ PMIX_ACQUIRE_OBJECT(cb); - pmix_output_verbose(2, pmix_client_globals.get_output, - "pmix: getnbfn value for proc %s:%u key %s", - cb->pname.nspace, cb->pname.rank, - (NULL == cb->key) ? "NULL" : cb->key); - /* set the proc object identifier */ pmix_strncpy(proc.nspace, cb->pname.nspace, PMIX_MAX_NSLEN); proc.rank = cb->pname.rank; + pmix_output_verbose(2, pmix_client_globals.get_output, + "pmix: getnbfn value for proc %s key %s", + PMIX_NAME_PRINT(&proc), + (NULL == cb->key) ? "NULL" : cb->key); + /* scan the incoming directives */ if (NULL != cb->info) { for (n=0; n < cb->ninfo; n++) { - if (0 == strncmp(cb->info[n].key, PMIX_OPTIONAL, PMIX_MAX_KEYLEN)) { + if (PMIX_CHECK_KEY(&cb->info[n], PMIX_OPTIONAL)) { optional = PMIX_INFO_TRUE(&cb->info[n]); - } else if (0 == strncmp(cb->info[n].key, PMIX_IMMEDIATE, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_IMMEDIATE)) { immediate = PMIX_INFO_TRUE(&cb->info[n]); - } else if (0 == strncmp(cb->info[n].key, PMIX_TIMEOUT, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_TIMEOUT)) { /* set a timer to kick us out if we don't * have an answer within their window */ if (0 < cb->info[n].value.data.integer) { @@ -578,8 +595,16 @@ static void _getnbfn(int fd, short flags, void *cbdata) pmix_event_evtimer_add(&cb->ev, &tv); cb->timer_running = true; } - } else if (0 == strncmp(cb->info[n].key, PMIX_DATA_SCOPE, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_DATA_SCOPE)) { cb->scope = cb->info[n].value.data.scope; + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_SESSION_INFO)) { + cb->level = PMIX_LEVEL_SESSION; + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_JOB_INFO)) { + cb->level = PMIX_LEVEL_JOB; + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_APP_INFO)) { + cb->level = PMIX_LEVEL_APP; + } else if (PMIX_CHECK_KEY(&cb->info[n], PMIX_NODE_INFO)) { + cb->level = PMIX_LEVEL_NODE; } } } @@ -604,7 +629,13 @@ static void _getnbfn(int fd, short flags, void *cbdata) /* fetch the data from my server's module - since we are passing * it back to the user, we need a copy of it */ cb->copy = true; - PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + /* if the peer and server GDS component are the same, then no + * point in trying it again */ + if (0 != strcmp(pmix_globals.mypeer->nptr->compat.gds->name, pmix_client_globals.myserver->nptr->compat.gds->name)) { + PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); + } else { + rc = PMIX_ERR_TAKE_NEXT_OPTION; + } if (PMIX_SUCCESS != rc) { pmix_output_verbose(5, pmix_client_globals.get_output, "pmix:client job-level data NOT found"); @@ -653,7 +684,17 @@ static void _getnbfn(int fd, short flags, void *cbdata) "pmix:client job-level data NOT found"); rc = process_values(&val, cb); goto respond; + } else if (PMIX_RANK_UNDEF == proc.rank) { + /* the data would have to be stored on our own peer, so + * we need to go request it */ + goto request; } else { + /* if the peer and server GDS component are the same, then no + * point in trying it again */ + if (0 == strcmp(pmix_globals.mypeer->nptr->compat.gds->name, pmix_client_globals.myserver->nptr->compat.gds->name)) { + val = NULL; + goto request; + } cb->proc = &proc; cb->copy = true; PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb); @@ -722,9 +763,9 @@ static void _getnbfn(int fd, short flags, void *cbdata) } pmix_output_verbose(2, pmix_client_globals.get_output, - "%s:%d REQUESTING DATA FROM SERVER FOR %s:%d KEY %s", - pmix_globals.myid.nspace, pmix_globals.myid.rank, - cb->pname.nspace, cb->pname.rank, cb->key); + "%s REQUESTING DATA FROM SERVER FOR %s KEY %s", + PMIX_NAME_PRINT(&pmix_globals.myid), + PMIX_NAME_PRINT(cb->proc), cb->key); /* track the callback object */ pmix_list_append(&pmix_client_globals.pending_requests, &cb->super); diff --git a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_log.c b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_log.c index 582a64e4eaa..0c5aa760fc7 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_log.c +++ b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_log.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -82,14 +82,18 @@ PMIX_EXPORT pmix_status_t PMIx_Log(const pmix_info_t data[], size_t ndata, * recv routine so we know which callback to use when * the return message is recvd */ PMIX_CONSTRUCT(&cb, pmix_cb_t); - if (PMIX_SUCCESS != (rc = PMIx_Log_nb(data, ndata, directives, - ndirs, opcbfunc, &cb))) { + rc = PMIx_Log_nb(data, ndata, directives, ndirs, opcbfunc, &cb); + if (PMIX_SUCCESS == rc) { + /* wait for the operation to complete */ + PMIX_WAIT_THREAD(&cb.lock); + } else { PMIX_DESTRUCT(&cb); + if (PMIX_OPERATION_SUCCEEDED == rc) { + rc = PMIX_SUCCESS; + } return rc; } - /* wait for the operation to complete */ - PMIX_WAIT_THREAD(&cb.lock); rc = cb.status; PMIX_DESTRUCT(&cb); diff --git a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_query.c b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_query.c index c40f08b6a0e..1f217d18a5d 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/common/pmix_query.c +++ b/opal/mca/pmix/pmix3x/pmix/src/common/pmix_query.c @@ -144,6 +144,7 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque pmix_list_t results; pmix_kval_t *kv, *kvnxt; pmix_proc_t proc; + bool rank_given; PMIX_ACQUIRE_THREAD(&pmix_global_lock); @@ -190,6 +191,7 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque memset(proc.nspace, 0, PMIX_MAX_NSLEN+1); proc.rank = PMIX_RANK_INVALID; for (n=0; n < nqueries; n++) { + rank_given = false; for (p=0; p < queries[n].nqual; p++) { if (PMIX_CHECK_KEY(&queries[n].qualifiers[p], PMIX_QUERY_REFRESH_CACHE)) { if (PMIX_INFO_TRUE(&queries[n].qualifiers[p])) { @@ -199,37 +201,41 @@ PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nque } else if (PMIX_CHECK_KEY(&queries[n].qualifiers[p], PMIX_PROCID)) { PMIX_LOAD_NSPACE(proc.nspace, queries[n].qualifiers[p].value.data.proc->nspace); proc.rank = queries[n].qualifiers[p].value.data.proc->rank; + rank_given = true; } else if (PMIX_CHECK_KEY(&queries[n].qualifiers[p], PMIX_NSPACE)) { PMIX_LOAD_NSPACE(proc.nspace, queries[n].qualifiers[p].value.data.string); } else if (PMIX_CHECK_KEY(&queries[n].qualifiers[p], PMIX_RANK)) { proc.rank = queries[n].qualifiers[p].value.data.rank; - } else if (PMIX_CHECK_KEY(&queries[n].qualifiers[p], PMIX_HOSTNAME)) { - if (0 != strcmp(queries[n].qualifiers[p].value.data.string, pmix_globals.hostname)) { - /* asking about a different host, so ask for the info */ - PMIX_LIST_DESTRUCT(&results); - goto query; - } + rank_given = true; } } /* we get here if a refresh isn't required - first try a local * "get" on the data to see if we already have it */ PMIX_CONSTRUCT(&cb, pmix_cb_t); cb.copy = false; - /* set the proc */ - if (PMIX_RANK_INVALID == proc.rank && - 0 == strlen(proc.nspace)) { - /* use our id */ - cb.proc = &pmix_globals.myid; + /* if they are querying about node or app values not directly + * associated with a proc (i.e., they didn't specify the proc), + * then we obtain those by leaving the proc info as undefined */ + if (!rank_given) { + proc.rank = PMIX_RANK_UNDEF; + cb.proc = &proc; } else { - if (0 == strlen(proc.nspace)) { - /* use our nspace */ - PMIX_LOAD_NSPACE(cb.proc->nspace, pmix_globals.myid.nspace); - } - if (PMIX_RANK_INVALID == proc.rank) { - /* user the wildcard rank */ - proc.rank = PMIX_RANK_WILDCARD; + /* set the proc */ + if (PMIX_RANK_INVALID == proc.rank && + 0 == strlen(proc.nspace)) { + /* use our id */ + cb.proc = &pmix_globals.myid; + } else { + if (0 == strlen(proc.nspace)) { + /* use our nspace */ + PMIX_LOAD_NSPACE(cb.proc->nspace, pmix_globals.myid.nspace); + } + if (PMIX_RANK_INVALID == proc.rank) { + /* user the wildcard rank */ + proc.rank = PMIX_RANK_WILDCARD; + } + cb.proc = &proc; } - cb.proc = &proc; } for (p=0; NULL != queries[n].keys[p]; p++) { cb.key = queries[n].keys[p]; diff --git a/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_notification.c b/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_notification.c index 574607ec4b0..c667489394c 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_notification.c +++ b/opal/mca/pmix/pmix3x/pmix/src/event/pmix_event_notification.c @@ -883,7 +883,7 @@ static void _notify_client_event(int sd, short args, void *cbdata) } else { /* look up the nspace for this proc */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (PMIX_CHECK_NSPACE(tmp->nspace, cd->targets[n].nspace)) { nptr = tmp; break; diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c index 8959ba56845..df8a6b6e5b4 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c +++ b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.c @@ -306,6 +306,7 @@ static void cbcon(pmix_cb_t *p) PMIX_CONSTRUCT(&p->kvs, pmix_list_t); p->copy = false; p->timer_running = false; + p->level = PMIX_LEVEL_UNDEF; } static void cbdes(pmix_cb_t *p) { @@ -342,6 +343,7 @@ static void qcon(pmix_query_caddy_t *p) p->relcbfunc = NULL; p->credcbfunc = NULL; p->validcbfunc = NULL; + PMIX_CONSTRUCT(&p->results, pmix_list_t); } static void qdes(pmix_query_caddy_t *p) { @@ -349,6 +351,8 @@ static void qdes(pmix_query_caddy_t *p) PMIX_BYTE_OBJECT_DESTRUCT(&p->bo); PMIX_PROC_FREE(p->targets, p->ntargets); PMIX_INFO_FREE(p->info, p->ninfo); + PMIX_LIST_DESTRUCT(&p->results); + PMIX_QUERY_FREE(p->queries, p->nqueries); } PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_query_caddy_t, pmix_object_t, @@ -566,3 +570,30 @@ static bool dirpath_is_empty(const char *path ) return true; } + +int pmix_event_assign(struct event *ev, pmix_event_base_t *evbase, + int fd, short arg, event_callback_fn cbfn, void *cbd) +{ +#if PMIX_HAVE_LIBEV + event_set(ev, fd, arg, cbfn, cbd); + event_base_set(evbase, ev); +#else + event_assign(ev, evbase, fd, arg, cbfn, cbd); +#endif + return 0; +} + +pmix_event_t* pmix_event_new(pmix_event_base_t *b, int fd, + short fg, event_callback_fn cbfn, void *cbd) +{ + pmix_event_t *ev = NULL; + +#if PMIX_HAVE_LIBEV + ev = (pmix_event_t*)calloc(1, sizeof(pmix_event_t)); + ev->ev_base = b; +#else + ev = event_new(b, fd, fg, (event_callback_fn) cbfn, cbd); +#endif + + return ev; +} diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h index 202679cc4ee..113cd48faab 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h +++ b/opal/mca/pmix/pmix3x/pmix/src/include/pmix_globals.h @@ -122,6 +122,16 @@ typedef enum { PMIX_COLLECT_MAX } pmix_collect_t; +/* define a set of flags indicating the level + * of information being stored/requested */ +typedef enum { + PMIX_LEVEL_UNDEF, + PMIX_LEVEL_SESSION, + PMIX_LEVEL_JOB, + PMIX_LEVEL_APP, + PMIX_LEVEL_NODE +} pmix_level_t; + /**** PEER STRUCTURES ****/ /* clients can only talk to their server, and servers are @@ -268,6 +278,7 @@ typedef struct { pmix_info_t *info; size_t ninfo; pmix_byte_object_t bo; + pmix_list_t results; pmix_info_cbfunc_t cbfunc; pmix_value_cbfunc_t valcbfunc; pmix_release_cbfunc_t relcbfunc; @@ -388,6 +399,7 @@ typedef struct { pmix_list_t kvs; bool copy; bool timer_running; + pmix_level_t level; } pmix_cb_t; PMIX_CLASS_DECLARATION(pmix_cb_t); @@ -481,6 +493,7 @@ typedef struct { bool xml_output; bool timestamp_output; size_t output_limit; + pmix_list_t nspaces; } pmix_globals_t; /* provide access to a function to cleanup epilogs */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/include/types.h b/opal/mca/pmix/pmix3x/pmix/src/include/types.h index cf8d082c34d..6b52843ee48 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/include/types.h +++ b/opal/mca/pmix/pmix3x/pmix/src/include/types.h @@ -9,9 +9,11 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,11 +45,17 @@ #include #endif #include PMIX_EVENT_HEADER +#if ! PMIX_HAVE_LIBEV +#include PMIX_EVENT2_THREAD_HEADER +#endif #if PMIX_ENABLE_DEBUG #include "src/util/output.h" #endif +#include +#include + /* * portable assignment of pointer to int @@ -137,6 +145,18 @@ static inline uint64_t pmix_ntoh64(uint64_t val) #endif } +/* Convert size_t value from host to network byte order and back */ +#if SIZEOF_SIZE_T == 4 + +#define pmix_htonsizet(x) htonl(x) +#define pmix_ntohsizet(x) ntohl(x) + +#elif SIZEOF_SIZE_T == 8 + +#define pmix_htonsizet(x) pmix_hton64(x) +#define pmix_ntohsizet(x) pmix_ntoh64(x) + +#endif /** * Convert between a local representation of pointer and a 64 bits value. @@ -225,6 +245,8 @@ static inline uint64_t pmix_swap_bytes8(uint64_t val) #define PMIX_EVLOOP_ONCE EVLOOP_ONCE /**< Block at most once. */ #define PMIX_EVLOOP_NONBLOCK EVLOOP_NONBLOCK /**< Do not block. */ +#define PMIX_EVENT_SIGNAL(ev) pmix_event_get_signal(ev) + typedef struct event_base pmix_event_base_t; typedef struct event pmix_event_t; @@ -232,42 +254,52 @@ typedef struct event pmix_event_t; #define pmix_event_base_free(b) event_base_free(b) -#define pmix_event_free(x) event_free(x) -#define pmix_event_base_loopbreak(b) event_base_loopbreak(b) - -#define pmix_event_base_loopexit(b) event_base_loopexit(b, NULL) +#if PMIX_HAVE_LIBEV +#define pmix_event_use_threads() +#define pmix_event_free(b) free(b) +#define pmix_event_get_signal(x) (x)->ev_fd +#else /* thread support APIs */ #define pmix_event_use_threads() evthread_use_pthreads() +#define pmix_event_free(x) event_free(x) +#define pmix_event_get_signal(x) event_get_signal(x) +#endif /* Basic event APIs */ #define pmix_event_enable_debug_mode() event_enable_debug_mode() -#define pmix_event_assign(x, b, fd, fg, cb, arg) event_assign((x), (b), (fd), (fg), (event_callback_fn) (cb), (arg)) +PMIX_EXPORT int pmix_event_assign(struct event *ev, pmix_event_base_t *evbase, + int fd, short arg, event_callback_fn cbfn, void *cbd); -#define pmix_event_set(b, x, fd, fg, cb, arg) event_assign((x), (b), (fd), (fg), (event_callback_fn) (cb), (arg)) +#define pmix_event_set(b, x, fd, fg, cb, arg) pmix_event_assign((x), (b), (fd), (fg), (event_callback_fn) (cb), (arg)) +#if PMIX_HAVE_LIBEV +PMIX_EXPORT int pmix_event_add(struct event *ev, struct timeval *tv); +PMIX_EXPORT int pmix_event_del(struct event *ev); +PMIX_EXPORT void pmix_event_active (struct event *ev, int res, short ncalls); +PMIX_EXPORT void pmix_event_base_loopexit (pmix_event_base_t *b); +#else #define pmix_event_add(ev, tv) event_add((ev), (tv)) - #define pmix_event_del(ev) event_del((ev)) - #define pmix_event_active(x, y, z) event_active((x), (y), (z)) +#define pmix_event_base_loopexit(b) event_base_loopexit(b, NULL) +#endif -#define pmix_event_new(b, fd, fg, cb, arg) event_new((b), (fd), (fg), (event_callback_fn) (cb), (arg)) +PMIX_EXPORT pmix_event_t* pmix_event_new(pmix_event_base_t *b, int fd, + short fg, event_callback_fn cbfn, void *cbd); #define pmix_event_loop(b, fg) event_base_loop((b), (fg)) -#define pmix_event_active(x, y, z) event_active((x), (y), (z)) - #define pmix_event_evtimer_new(b, cb, arg) pmix_event_new((b), -1, 0, (cb), (arg)) #define pmix_event_evtimer_add(x, tv) pmix_event_add((x), (tv)) -#define pmix_event_evtimer_set(b, x, cb, arg) event_assign((x), (b), -1, 0, (event_callback_fn) (cb), (arg)) +#define pmix_event_evtimer_set(b, x, cb, arg) pmix_event_assign((x), (b), -1, 0, (event_callback_fn) (cb), (arg)) #define pmix_event_evtimer_del(x) pmix_event_del((x)) -#define pmix_event_signal_set(b, x, fd, cb, arg) event_assign((x), (b), (fd), EV_SIGNAL|EV_PERSIST, (event_callback_fn) (cb), (arg)) +#define pmix_event_signal_set(b, x, fd, cb, arg) pmix_event_assign((x), (b), (fd), EV_SIGNAL|EV_PERSIST, (event_callback_fn) (cb), (arg)) #endif /* PMIX_TYPES_H */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_find.c b/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_find.c index fed38f988fd..7d96e21c36a 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_find.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_component_find.c @@ -16,7 +16,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -339,8 +339,8 @@ static int component_find_check (pmix_mca_base_framework_t *framework, char **re } if (!found) { - char h[MAXHOSTNAMELEN]; - gethostname(h, sizeof(h)); + char h[PMIX_MAXHOSTNAMELEN] = {0}; + gethostname(h, sizeof(h)-1); pmix_show_help("help-pmix-mca-base.txt", "find-available:not-valid", true, h, framework->framework_name, requested_component_names[i]); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_open.c b/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_open.c index f152f2c2a95..fbb55dcb355 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_open.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/base/pmix_mca_base_open.c @@ -13,7 +13,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -68,7 +68,7 @@ int pmix_mca_base_open(void) { char *value; pmix_output_stream_t lds; - char hostname[64]; + char hostname[PMIX_MAXHOSTNAMELEN] = {0}; int var_id; int rc; @@ -155,7 +155,7 @@ int pmix_mca_base_open(void) } else { set_defaults(&lds); } - gethostname(hostname, 64); + gethostname(hostname, PMIX_MAXHOSTNAMELEN-1); rc = asprintf(&lds.lds_prefix, "[%s:%05d] ", hostname, getpid()); if (0 > rc) { return PMIX_ERR_OUT_OF_RESOURCE; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/copy.c b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/copy.c index 06720cf32ec..fbdbae3efff 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/copy.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/bfrops/v20/copy.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -431,10 +431,15 @@ pmix_status_t pmix20_bfrop_value_xfer(pmix_value_t *p, const pmix_value_t *src) memcpy(&p->data.status, &src->data.status, sizeof(pmix_status_t)); break; case PMIX_PROC: - memcpy(&p->data.proc, &src->data.proc, sizeof(pmix_proc_t)); + /* create the storage */ + p->data.proc = (pmix_proc_t*)malloc(sizeof(pmix_proc_t)); + if (NULL == p->data.proc) { + return PMIX_ERR_NOMEM; + } + memcpy(p->data.proc, src->data.proc, sizeof(pmix_proc_t)); break; case PMIX_PROC_RANK: - memcpy(&p->data.proc, &src->data.rank, sizeof(pmix_rank_t)); + memcpy(&p->data.rank, &src->data.rank, sizeof(pmix_rank_t)); break; case PMIX_BYTE_OBJECT: case PMIX_COMPRESSED_STRING: @@ -653,7 +658,12 @@ pmix_status_t pmix20_bfrop_value_xfer(pmix_value_t *p, const pmix_value_t *src) p1 = (pmix_info_t*)p->data.darray->array; s1 = (pmix_info_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { - PMIX_INFO_LOAD(&p1[n], s1[n].key, &s1[n].value.data.flag, s1[n].value.type); + PMIX_LOAD_KEY(p1[n].key, s1[n].key); + rc = pmix_value_xfer(&p1[n].value, &s1[n].value); + if (PMIX_SUCCESS != rc) { + PMIX_INFO_FREE(p1, src->data.darray->size); + return rc; + } } break; case PMIX_PDATA: @@ -664,7 +674,13 @@ pmix_status_t pmix20_bfrop_value_xfer(pmix_value_t *p, const pmix_value_t *src) pd = (pmix_pdata_t*)p->data.darray->array; sd = (pmix_pdata_t*)src->data.darray->array; for (n=0; n < src->data.darray->size; n++) { - PMIX_PDATA_LOAD(&pd[n], &sd[n].proc, sd[n].key, &sd[n].value.data.flag, sd[n].value.type); + memcpy(&pd[n].proc, &sd[n].proc, sizeof(pmix_proc_t)); + PMIX_LOAD_KEY(pd[n].key, sd[n].key); + rc = pmix_value_xfer(&pd[n].value, &sd[n].value); + if (PMIX_SUCCESS != rc) { + PMIX_INFO_FREE(pd, src->data.darray->size); + return rc; + } } break; case PMIX_BUFFER: diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_segment.c b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_segment.c index a219bed9c0b..69ec1ba577f 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_segment.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/common/dstore/dstore_segment.c @@ -1,9 +1,9 @@ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2016-2017 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2018 Research Organization for Information Science + * Copyright (c) 2018-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * * $COPYRIGHT$ @@ -30,6 +30,11 @@ #ifdef HAVE_SYS_AUXV_H #include +#if PMIX_HAVE_LIBEV +/* EV_NONE is macro-defined in that is included by + * and used in an enum in from libev, so #undef it to fix an issue*/ +#undef EV_NONE +#endif #endif #include diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c index 4e092fc5a68..5e6a5341bd2 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/gds/hash/gds_hash.c @@ -38,11 +38,12 @@ #include "src/server/pmix_server_ops.h" #include "src/util/argv.h" #include "src/util/compress.h" +#include "src/mca/preg/preg.h" #include "src/util/error.h" #include "src/util/hash.h" #include "src/util/output.h" +#include "src/util/name_fns.h" #include "src/util/pmix_environ.h" -#include "src/mca/preg/preg.h" #include "src/mca/gds/base/base.h" #include "gds_hash.h" @@ -115,6 +116,24 @@ pmix_gds_base_module_t pmix_hash_module = { .accept_kvs_resp = accept_kvs_resp }; +/* Define a bitmask to track what information may not have + * been provided but is computable from other info */ +#define PMIX_HASH_PROC_DATA 0x00000001 +#define PMIX_HASH_JOB_SIZE 0x00000002 +#define PMIX_HASH_MAX_PROCS 0x00000004 +#define PMIX_HASH_NUM_NODES 0x00000008 +#define PMIX_HASH_PROC_MAP 0x00000010 +#define PMIX_HASH_NODE_MAP 0x00000020 + +/**********************************************/ +/* struct definitions */ +typedef struct { + pmix_list_item_t super; + uint32_t session; + pmix_list_t sessioninfo; + pmix_list_t nodeinfo; +} pmix_session_t; + typedef struct { pmix_list_item_t super; char *ns; @@ -123,12 +142,49 @@ typedef struct { pmix_hash_table_t remote; pmix_hash_table_t local; bool gdata_added; -} pmix_hash_trkr_t; + pmix_list_t jobinfo; + pmix_list_t apps; + pmix_list_t nodeinfo; + pmix_session_t *session; +} pmix_job_t; -static void htcon(pmix_hash_trkr_t *p) +typedef struct { + pmix_list_item_t super; + uint32_t appnum; + pmix_list_t appinfo; + pmix_list_t nodeinfo; + pmix_job_t *job; +} pmix_apptrkr_t; + +typedef struct { + pmix_list_item_t super; + uint32_t nodeid; + char *hostname; + pmix_list_t info; +} pmix_nodeinfo_t; + +/**********************************************/ +/* class instantiations */ +static void scon(pmix_session_t *s) +{ + s->session = UINT32_MAX; + PMIX_CONSTRUCT(&s->sessioninfo, pmix_list_t); + PMIX_CONSTRUCT(&s->nodeinfo, pmix_list_t); +} +static void sdes(pmix_session_t *s) +{ + PMIX_LIST_DESTRUCT(&s->sessioninfo); + PMIX_LIST_DESTRUCT(&s->nodeinfo); +} +static PMIX_CLASS_INSTANCE(pmix_session_t, + pmix_list_item_t, + scon, sdes); + +static void htcon(pmix_job_t *p) { p->ns = NULL; p->nptr = NULL; + PMIX_CONSTRUCT(&p->jobinfo, pmix_list_t); PMIX_CONSTRUCT(&p->internal, pmix_hash_table_t); pmix_hash_table_init(&p->internal, 256); PMIX_CONSTRUCT(&p->remote, pmix_hash_table_t); @@ -136,8 +192,11 @@ static void htcon(pmix_hash_trkr_t *p) PMIX_CONSTRUCT(&p->local, pmix_hash_table_t); pmix_hash_table_init(&p->local, 256); p->gdata_added = false; + PMIX_CONSTRUCT(&p->apps, pmix_list_t); + PMIX_CONSTRUCT(&p->nodeinfo, pmix_list_t); + p->session = NULL; } -static void htdes(pmix_hash_trkr_t *p) +static void htdes(pmix_job_t *p) { if (NULL != p->ns) { free(p->ns); @@ -145,25 +204,411 @@ static void htdes(pmix_hash_trkr_t *p) if (NULL != p->nptr) { PMIX_RELEASE(p->nptr); } + PMIX_LIST_DESTRUCT(&p->jobinfo); pmix_hash_remove_data(&p->internal, PMIX_RANK_WILDCARD, NULL); PMIX_DESTRUCT(&p->internal); pmix_hash_remove_data(&p->remote, PMIX_RANK_WILDCARD, NULL); PMIX_DESTRUCT(&p->remote); pmix_hash_remove_data(&p->local, PMIX_RANK_WILDCARD, NULL); PMIX_DESTRUCT(&p->local); + PMIX_LIST_DESTRUCT(&p->apps); + PMIX_LIST_DESTRUCT(&p->nodeinfo); + if (NULL != p->session) { + PMIX_RELEASE(p->session); + } } -static PMIX_CLASS_INSTANCE(pmix_hash_trkr_t, +static PMIX_CLASS_INSTANCE(pmix_job_t, pmix_list_item_t, htcon, htdes); -static pmix_list_t myhashes; +static void apcon(pmix_apptrkr_t *p) +{ + p->appnum = 0; + PMIX_CONSTRUCT(&p->appinfo, pmix_list_t); + PMIX_CONSTRUCT(&p->nodeinfo, pmix_list_t); + p->job = NULL; +} +static void apdes(pmix_apptrkr_t *p) +{ + PMIX_LIST_DESTRUCT(&p->appinfo); + PMIX_LIST_DESTRUCT(&p->nodeinfo); + if (NULL != p->job) { + PMIX_RELEASE(p->job); + } +} +static PMIX_CLASS_INSTANCE(pmix_apptrkr_t, + pmix_list_item_t, + apcon, apdes); + +static void ndinfocon(pmix_nodeinfo_t *p) +{ + p->nodeid = 0; + p->hostname = NULL; + PMIX_CONSTRUCT(&p->info, pmix_list_t); +} +static void ndinfodes(pmix_nodeinfo_t *p) +{ + if (NULL != p->hostname) { + free(p->hostname); + } + PMIX_LIST_DESTRUCT(&p->info); +} +static PMIX_CLASS_INSTANCE(pmix_nodeinfo_t, + pmix_list_item_t, + ndinfocon, ndinfodes); + +/**********************************************/ + +/* process a node array - contains an array of + * node-level info for a single node. Either the + * nodeid, hostname, or both must be included + * in the array to identify the node */ +static pmix_status_t process_node_array(pmix_info_t *info, + pmix_list_t *tgt) +{ + size_t size, j; + pmix_info_t *iptr; + pmix_status_t rc = PMIX_SUCCESS; + pmix_kval_t *kp2, *k1, *knext; + pmix_list_t cache; + pmix_nodeinfo_t *nd = NULL, *ndptr; + bool update; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "PROCESSING NODE ARRAY"); + + /* array of node-level info for a specific node */ + if (PMIX_DATA_ARRAY != info->value.type) { + PMIX_ERROR_LOG(PMIX_ERR_TYPE_MISMATCH); + return PMIX_ERR_TYPE_MISMATCH; + } + + /* setup arrays */ + size = info->value.data.darray->size; + iptr = (pmix_info_t*)info->value.data.darray->array; + PMIX_CONSTRUCT(&cache, pmix_list_t); + + /* cache the values while searching for the nodeid + * and/or hostname */ + for (j=0; j < size; j++) { + if (PMIX_CHECK_KEY(&iptr[j], PMIX_NODEID)) { + if (NULL == nd) { + nd = PMIX_NEW(pmix_nodeinfo_t); + } + PMIX_VALUE_GET_NUMBER(rc, &iptr[j].value, nd->nodeid, uint32_t); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(nd); + PMIX_LIST_DESTRUCT(&cache); + return rc; + } + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_HOSTNAME)) { + if (NULL == nd) { + nd = PMIX_NEW(pmix_nodeinfo_t); + } + nd->hostname = strdup(iptr[j].value.data.string); + } else { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(iptr[j].key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, &iptr[j].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + if (NULL != nd) { + PMIX_RELEASE(nd); + } + PMIX_LIST_DESTRUCT(&cache); + return rc; + } + pmix_list_append(&cache, &kp2->super); + } + } + + if (NULL == nd) { + /* they forgot to pass us the ident for the node */ + PMIX_LIST_DESTRUCT(&cache); + return PMIX_ERR_BAD_PARAM; + } + + /* see if we already have this node on the + * provided list */ + update = false; + PMIX_LIST_FOREACH(ndptr, tgt, pmix_nodeinfo_t) { + if (ndptr->nodeid == nd->nodeid || + (NULL != ndptr->hostname && NULL != nd->hostname && 0 == strcmp(ndptr->hostname, nd->hostname))) { + /* we assume that the data is updating the current + * values */ + if (NULL == ndptr->hostname && NULL != nd->hostname) { + ndptr->hostname = strdup(nd->hostname); + } + PMIX_RELEASE(nd); + nd = ndptr; + update = true; + break; + } + } + + /* transfer the cached items to the nodeinfo list */ + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + while (NULL != kp2) { + /* if this is an update, we have to ensure each data + * item only appears once on the list */ + if (update) { + PMIX_LIST_FOREACH_SAFE(k1, knext, &nd->info, pmix_kval_t) { + if (PMIX_CHECK_KEY(k1, kp2->key)) { + pmix_list_remove_item(&nd->info, &k1->super); + PMIX_RELEASE(k1); + break; + } + } + } + pmix_list_append(&nd->info, &kp2->super); + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + } + PMIX_LIST_DESTRUCT(&cache); + + pmix_list_append(tgt, &nd->super); + return PMIX_SUCCESS; +} + +/* process an app array - contains an array of + * app-level info for a single app. If the + * appnum is not included in the array, then + * it is assumed that only app is in the job. + * This assumption is checked and generates + * an error if violated */ +static pmix_status_t process_app_array(pmix_info_t *info, + pmix_job_t *trk) +{ + pmix_list_t cache, ncache; + size_t size, j; + pmix_info_t *iptr; + pmix_status_t rc = PMIX_SUCCESS; + uint32_t appnum; + pmix_apptrkr_t *app = NULL, *apptr; + pmix_kval_t *kp2, *k1, *knext; + pmix_nodeinfo_t *nd; + bool update; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "PROCESSING APP ARRAY"); + + /* apps have to belong to a job */ + if (NULL == trk) { + return PMIX_ERR_BAD_PARAM; + } + + /* array of app-level info */ + if (PMIX_DATA_ARRAY != info->value.type) { + PMIX_ERROR_LOG(PMIX_ERR_TYPE_MISMATCH); + return PMIX_ERR_TYPE_MISMATCH; + } + + /* setup arrays and lists */ + PMIX_CONSTRUCT(&cache, pmix_list_t); + PMIX_CONSTRUCT(&ncache, pmix_list_t); + size = info->value.data.darray->size; + iptr = (pmix_info_t*)info->value.data.darray->array; + + for (j=0; j < size; j++) { + if (PMIX_CHECK_KEY(&iptr[j], PMIX_APPNUM)) { + PMIX_VALUE_GET_NUMBER(rc, &iptr[j].value, appnum, uint32_t); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto release; + } + if (NULL != app) { + /* this is an error - there can be only one app + * described in this array */ + PMIX_RELEASE(app); + PMIX_LIST_DESTRUCT(&cache); + PMIX_LIST_DESTRUCT(&ncache); + return PMIX_ERR_BAD_PARAM; + } + app = PMIX_NEW(pmix_apptrkr_t); + app->appnum = appnum; + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_NODE_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_node_array(&iptr[j], &ncache))) { + PMIX_ERROR_LOG(rc); + goto release; + } + } else { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(iptr[j].key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, &iptr[j].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + goto release; + } + pmix_list_append(&cache, &kp2->super); + } + } + if (NULL == app) { + /* per the standard, they don't have to provide us with + * an appnum so long as only one app is in the job */ + if (0 == pmix_list_get_size(&trk->apps)) { + app = PMIX_NEW(pmix_apptrkr_t); + } else { + /* this is not allowed to happen - they are required + * to provide us with an app number per the standard */ + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + goto release; + } + } + /* see if we already have this app on the + * provided list */ + update = false; + PMIX_LIST_FOREACH(apptr, &trk->apps, pmix_apptrkr_t) { + if (apptr->appnum == app->appnum) { + /* we assume that the data is updating the current + * values */ + PMIX_RELEASE(app); + app = apptr; + update = true; + break; + } + } + + /* point the app at its job */ + if (NULL == app->job) { + PMIX_RETAIN(trk); + app->job = trk; + } + + /* transfer the app-level data across */ + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + while (NULL != kp2) { + /* if this is an update, we have to ensure each data + * item only appears once on the list */ + if (update) { + PMIX_LIST_FOREACH_SAFE(k1, knext, &app->appinfo, pmix_kval_t) { + if (PMIX_CHECK_KEY(k1, kp2->key)) { + pmix_list_remove_item(&app->appinfo, &k1->super); + PMIX_RELEASE(k1); + break; + } + } + } + pmix_list_append(&app->appinfo, &kp2->super); + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + } + /* transfer the associated node-level data across */ + nd = (pmix_nodeinfo_t*)pmix_list_remove_first(&ncache); + while (NULL != nd) { + pmix_list_append(&app->nodeinfo, &nd->super); + nd = (pmix_nodeinfo_t*)pmix_list_remove_first(&ncache); + } + + release: + PMIX_LIST_DESTRUCT(&cache); + PMIX_LIST_DESTRUCT(&ncache); + + return rc; +} + +/* process a job array */ +static pmix_status_t process_job_array(pmix_info_t *info, + pmix_job_t *trk, + uint32_t *flags, + char ***procs, + char ***nodes) +{ + pmix_list_t cache; + size_t j, size; + pmix_info_t *iptr; + pmix_kval_t *kp2; + pmix_status_t rc; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "PROCESSING JOB ARRAY"); + + /* array of job-level info */ + if (PMIX_DATA_ARRAY != info->value.type) { + PMIX_ERROR_LOG(PMIX_ERR_TYPE_MISMATCH); + return PMIX_ERR_TYPE_MISMATCH; + } + size = info->value.data.darray->size; + iptr = (pmix_info_t*)info->value.data.darray->array; + PMIX_CONSTRUCT(&cache, pmix_list_t); + for (j=0; j < size; j++) { + if (PMIX_CHECK_KEY(&iptr[j], PMIX_APP_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_app_array(&iptr[j], trk))) { + return rc; + } + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_NODE_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_node_array(&iptr[j], &trk->nodeinfo))) { + PMIX_ERROR_LOG(rc); + return rc; + } + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_PROC_MAP)) { + /* not allowed to get this more than once */ + if (*flags & PMIX_HASH_PROC_MAP) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + /* parse the regex to get the argv array containing proc ranks on each node */ + if (PMIX_SUCCESS != (rc = pmix_preg.parse_procs(iptr[j].value.data.string, procs))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* mark that we got the map */ + *flags |= PMIX_HASH_PROC_MAP; + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_NODE_MAP)) { + /* not allowed to get this more than once */ + if (*flags & PMIX_HASH_NODE_MAP) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + /* store the node map itself since that is + * what v3 uses */ + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(PMIX_NODE_MAP); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + kp2->value->type = PMIX_STRING; + kp2->value->data.string = strdup(iptr[j].value.data.string); + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->internal, PMIX_RANK_WILDCARD, kp2))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + PMIX_RELEASE(kp2); // maintain acctg + + /* parse the regex to get the argv array of node names */ + if (PMIX_SUCCESS != (rc = pmix_preg.parse_nodes(iptr[j].value.data.string, nodes))) { + PMIX_ERROR_LOG(rc); + return rc; + } + /* mark that we got the map */ + *flags |= PMIX_HASH_NODE_MAP; + } else { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(iptr[j].key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, &iptr[j].value); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(kp2); + PMIX_LIST_DESTRUCT(&cache); + return rc; + } + pmix_list_append(&trk->jobinfo, &kp2->super); + } + } + return PMIX_SUCCESS; +} + +static pmix_list_t mysessions, myjobs; static pmix_status_t hash_init(pmix_info_t info[], size_t ninfo) { pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "gds: hash init"); - PMIX_CONSTRUCT(&myhashes, pmix_list_t); + PMIX_CONSTRUCT(&mysessions, pmix_list_t); + PMIX_CONSTRUCT(&myjobs, pmix_list_t); return PMIX_SUCCESS; } @@ -172,7 +617,8 @@ static void hash_finalize(void) pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "gds: hash finalize"); - PMIX_LIST_DESTRUCT(&myhashes); + PMIX_LIST_DESTRUCT(&mysessions); + PMIX_LIST_DESTRUCT(&myjobs); } static pmix_status_t hash_assign_module(pmix_info_t *info, size_t ninfo, @@ -201,13 +647,6 @@ static pmix_status_t hash_assign_module(pmix_info_t *info, size_t ninfo, return PMIX_SUCCESS; } -/* Define a bitmask to track what information may not have - * been provided but is computable from other info */ -#define PMIX_HASH_PROC_DATA 0x00000001 -#define PMIX_HASH_JOB_SIZE 0x00000002 -#define PMIX_HASH_MAX_PROCS 0x00000004 -#define PMIX_HASH_NUM_NODES 0x00000008 - static pmix_status_t store_map(pmix_hash_table_t *ht, char **nodes, char **ppn, uint32_t flags) @@ -496,16 +935,20 @@ pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, pmix_info_t info[], size_t ninfo) { pmix_namespace_t *nptr = (pmix_namespace_t*)ns; - pmix_hash_trkr_t *trk, *t; + pmix_job_t *trk, *t; + pmix_session_t *s = NULL, *sptr; pmix_hash_table_t *ht; pmix_kval_t *kp2, *kvptr; pmix_info_t *iptr; char **nodes=NULL, **procs=NULL; uint8_t *tmp; + uint32_t sid=UINT32_MAX; pmix_rank_t rank; pmix_status_t rc=PMIX_SUCCESS; size_t n, j, size, len; uint32_t flags = 0; + pmix_list_t cache, ncache; + pmix_nodeinfo_t *nd; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%d] gds:hash:cache_job_info for nspace %s", @@ -514,7 +957,7 @@ pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, /* find the hash table for this nspace */ trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(nptr->nspace, t->ns)) { trk = t; break; @@ -522,14 +965,14 @@ pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, } if (NULL == trk) { /* create a tracker as we will likely need it */ - trk = PMIX_NEW(pmix_hash_trkr_t); + trk = PMIX_NEW(pmix_job_t); if (NULL == trk) { return PMIX_ERR_NOMEM; } PMIX_RETAIN(nptr); trk->nptr = nptr; trk->ns = strdup(nptr->nspace); - pmix_list_append(&myhashes, &trk->super); + pmix_list_append(&myjobs, &trk->super); } /* if there isn't any data, then be content with just @@ -541,7 +984,141 @@ pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, /* cache the job info on the internal hash table for this nspace */ ht = &trk->internal; for (n=0; n < ninfo; n++) { - if (0 == strcmp(info[n].key, PMIX_NODE_MAP)) { + if (PMIX_CHECK_KEY(&info[n], PMIX_SESSION_ID)) { + PMIX_VALUE_GET_NUMBER(rc, &info[n].value, sid, uint32_t); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto release; + } + /* see if we have this session */ + s = NULL; + PMIX_LIST_FOREACH(sptr, &mysessions, pmix_session_t) { + if (sptr->session == sid) { + s = sptr; + break; + } + } + if (NULL == s) { + s = PMIX_NEW(pmix_session_t); + s->session = sid; + pmix_list_append(&mysessions, &s->super); + } + /* point the job at it */ + if (NULL == trk->session) { + PMIX_RETAIN(s); + trk->session = s; + } + } else if (PMIX_CHECK_KEY(&info[n], PMIX_SESSION_INFO_ARRAY)) { + /* array of session-level info */ + if (PMIX_DATA_ARRAY != info[n].value.type) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + rc = PMIX_ERR_TYPE_MISMATCH; + goto release; + } + size = info[n].value.data.darray->size; + iptr = (pmix_info_t*)info[n].value.data.darray->array; + PMIX_CONSTRUCT(&cache, pmix_list_t); + PMIX_CONSTRUCT(&ncache, pmix_list_t); + for (j=0; j < size; j++) { + if (PMIX_CHECK_KEY(&iptr[j], PMIX_SESSION_ID)) { + PMIX_VALUE_GET_NUMBER(rc, &iptr[j].value, sid, uint32_t); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_LIST_DESTRUCT(&cache); + return rc; + } + /* setup a session object */ + if (NULL != s) { + /* does this match the one we were previously given? */ + if (sid != s->session) { + /* no - see if we already have this session */ + PMIX_LIST_FOREACH(sptr, &mysessions, pmix_session_t) { + if (sptr->session == sid) { + s = sptr; + break; + } + } + if (sid != s->session) { + /* wasn't found, so create one */ + s = PMIX_NEW(pmix_session_t); + s->session = sid; + pmix_list_append(&mysessions, &s->super); + } + } + } else { + s = PMIX_NEW(pmix_session_t); + s->session = sid; + pmix_list_append(&mysessions, &s->super); + } + } else if (PMIX_CHECK_KEY(&iptr[j], PMIX_NODE_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_node_array(&iptr[j], &ncache))) { + PMIX_ERROR_LOG(rc); + PMIX_LIST_DESTRUCT(&cache); + PMIX_LIST_DESTRUCT(&ncache); + goto release; + } + } else { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(iptr[j].key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, &iptr[j].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + PMIX_LIST_DESTRUCT(&cache); + PMIX_LIST_DESTRUCT(&ncache); + goto release; + } + pmix_list_append(&cache, &kp2->super); + } + } + if (NULL == s) { + /* this is not allowed to happen - they are required + * to provide us with a session ID per the standard */ + PMIX_LIST_DESTRUCT(&cache); + rc = PMIX_ERR_BAD_PARAM; + PMIX_ERROR_LOG(rc); + goto release; + } + /* point the job at it */ + if (NULL == trk->session) { + PMIX_RETAIN(s); + trk->session = s; + } + /* transfer the data across */ + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + while (NULL != kp2) { + pmix_list_append(&s->sessioninfo, &kp2->super); + kp2 = (pmix_kval_t*)pmix_list_remove_first(&cache); + } + PMIX_LIST_DESTRUCT(&cache); + nd = (pmix_nodeinfo_t*)pmix_list_remove_first(&ncache); + while (NULL != nd) { + pmix_list_append(&s->nodeinfo, &nd->super); + nd = (pmix_nodeinfo_t*)pmix_list_remove_first(&ncache); + } + PMIX_LIST_DESTRUCT(&ncache); + } else if (PMIX_CHECK_KEY(&info[n], PMIX_JOB_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_job_array(&info[n], trk, &flags, &procs, &nodes))) { + PMIX_ERROR_LOG(rc); + goto release; + } + } else if (PMIX_CHECK_KEY(&info[n], PMIX_APP_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_app_array(&info[n], trk))) { + PMIX_ERROR_LOG(rc); + goto release; + } + } else if (PMIX_CHECK_KEY(&info[n], PMIX_NODE_INFO_ARRAY)) { + if (PMIX_SUCCESS != (rc = process_node_array(&info[n], &trk->nodeinfo))) { + PMIX_ERROR_LOG(rc); + goto release; + } + } else if (PMIX_CHECK_KEY(&info[n], PMIX_NODE_MAP)) { + /* not allowed to get this more than once */ + if (flags & PMIX_HASH_NODE_MAP) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } /* store the node map itself since that is * what v3 uses */ kp2 = PMIX_NEW(pmix_kval_t); @@ -561,12 +1138,21 @@ pmix_status_t hash_cache_job_info(struct pmix_namespace_t *ns, PMIX_ERROR_LOG(rc); goto release; } - } else if (0 == strcmp(info[n].key, PMIX_PROC_MAP)) { + /* mark that we got the map */ + flags |= PMIX_HASH_NODE_MAP; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_PROC_MAP)) { + /* not allowed to get this more than once */ + if (flags & PMIX_HASH_PROC_MAP) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } /* parse the regex to get the argv array containing proc ranks on each node */ if (PMIX_SUCCESS != (rc = pmix_preg.parse_procs(info[n].value.data.string, &procs))) { PMIX_ERROR_LOG(rc); goto release; } + /* mark that we got the map */ + flags |= PMIX_HASH_PROC_MAP; } else if (0 == strcmp(info[n].key, PMIX_PROC_DATA)) { flags |= PMIX_HASH_PROC_DATA; /* an array of data pertaining to a specific proc */ @@ -723,18 +1309,18 @@ static pmix_status_t register_info(pmix_peer_t *peer, pmix_namespace_t *ns, pmix_buffer_t *reply) { - pmix_hash_trkr_t *trk, *t; + pmix_job_t *trk, *t; pmix_hash_table_t *ht; pmix_value_t *val, blob; pmix_status_t rc = PMIX_SUCCESS; pmix_info_t *info; size_t ninfo, n; - pmix_kval_t kv; + pmix_kval_t kv, *kvptr; pmix_buffer_t buf; pmix_rank_t rank; trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(ns->nspace, t->ns)) { trk = t; break; @@ -773,6 +1359,12 @@ static pmix_status_t register_info(pmix_peer_t *peer, PMIX_VALUE_RELEASE(val); } + /* add all values in the jobinfo list */ + PMIX_LIST_FOREACH(kvptr, &trk->jobinfo, pmix_kval_t) { + PMIX_BFROPS_PACK(rc, peer, reply, kvptr, 1, PMIX_KVAL); + } + + /* get the proc-level data for each proc in the job */ for (rank=0; rank < ns->nprocs; rank++) { val = NULL; rc = pmix_hash_fetch(ht, rank, NULL, &val); @@ -820,7 +1412,7 @@ static pmix_status_t hash_register_job_info(struct pmix_peer_t *pr, pmix_namespace_t *ns = peer->nptr; char *msg; pmix_status_t rc; - pmix_hash_trkr_t *trk, *t2; + pmix_job_t *trk, *t2; if (!PMIX_PROC_IS_SERVER(pmix_globals.mypeer) && !PMIX_PROC_IS_LAUNCHER(pmix_globals.mypeer)) { @@ -857,7 +1449,7 @@ static pmix_status_t hash_register_job_info(struct pmix_peer_t *pr, /* setup a tracker for this nspace as we will likely * need it again */ trk = NULL; - PMIX_LIST_FOREACH(t2, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t2, &myjobs, pmix_job_t) { if (ns == t2->nptr) { trk = t2; if (NULL == trk->ns) { @@ -867,11 +1459,11 @@ static pmix_status_t hash_register_job_info(struct pmix_peer_t *pr, } } if (NULL == trk) { - trk = PMIX_NEW(pmix_hash_trkr_t); + trk = PMIX_NEW(pmix_job_t); trk->ns = strdup(ns->nspace); PMIX_RETAIN(ns); trk->nptr = ns; - pmix_list_append(&myhashes, &trk->super); + pmix_list_append(&myjobs, &trk->super); } /* the job info for the specified nspace has @@ -914,10 +1506,11 @@ static pmix_status_t hash_store_job_info(const char *nspace, pmix_byte_object_t *bo; pmix_buffer_t buf2; int rank; - pmix_hash_trkr_t *htptr; + pmix_job_t *htptr; pmix_hash_table_t *ht; char **nodelist = NULL; pmix_info_t *info, *iptr; + pmix_namespace_t *ns, *nptr; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%u] pmix:gds:hash store job info for nspace %s", @@ -937,9 +1530,27 @@ static pmix_status_t hash_store_job_info(const char *nspace, return rc; } + /* see if we already have this nspace */ + nptr = NULL; + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { + if (0 == strcmp(ns->nspace, nspace)) { + nptr = ns; + break; + } + } + if (NULL == nptr) { + nptr = PMIX_NEW(pmix_namespace_t); + if (NULL == nptr) { + rc = PMIX_ERR_NOMEM; + return rc; + } + nptr->nspace = strdup(nspace); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); + } + /* see if we already have a hash table for this nspace */ ht = NULL; - PMIX_LIST_FOREACH(htptr, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(htptr, &myjobs, pmix_job_t) { if (0 == strcmp(htptr->ns, nspace)) { ht = &htptr->internal; break; @@ -947,9 +1558,11 @@ static pmix_status_t hash_store_job_info(const char *nspace, } if (NULL == ht) { /* nope - create one */ - htptr = PMIX_NEW(pmix_hash_trkr_t); + htptr = PMIX_NEW(pmix_job_t); htptr->ns = strdup(nspace); - pmix_list_append(&myhashes, &htptr->super); + PMIX_RETAIN(nptr); + htptr->nptr = nptr; + pmix_list_append(&myjobs, &htptr->super); ht = &htptr->internal; } @@ -961,7 +1574,7 @@ static pmix_status_t hash_store_job_info(const char *nspace, pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%u] pmix:gds:hash store job info working key %s", pmix_globals.myid.nspace, pmix_globals.myid.rank, kptr->key); - if (0 == strcmp(kptr->key, PMIX_PROC_BLOB)) { + if (PMIX_CHECK_KEY(kptr, PMIX_PROC_BLOB)) { bo = &(kptr->value->data.bo); PMIX_CONSTRUCT(&buf2, pmix_buffer_t); PMIX_LOAD_BUFFER(pmix_client_globals.myserver, &buf2, bo->bytes, bo->size); @@ -1012,7 +1625,7 @@ static pmix_status_t hash_store_job_info(const char *nspace, /* cleanup */ PMIX_DESTRUCT(&buf2); // releases the original kptr data PMIX_RELEASE(kp2); - } else if (0 == strcmp(kptr->key, PMIX_MAP_BLOB)) { + } else if (PMIX_CHECK_KEY(kptr, PMIX_MAP_BLOB)) { /* transfer the byte object for unpacking */ bo = &(kptr->value->data.bo); PMIX_CONSTRUCT(&buf2, pmix_buffer_t); @@ -1183,6 +1796,11 @@ static pmix_status_t hash_store_job_info(const char *nspace, PMIX_RELEASE(kptr); return rc; } + /* if this is the job size, then store it in + * the nptr tracker */ + if (0 == nptr->nprocs && PMIX_CHECK_KEY(kptr, PMIX_JOB_SIZE)) { + nptr->nprocs = kptr->value->data.uint32; + } } PMIX_RELEASE(kptr); kptr = PMIX_NEW(pmix_kval_t); @@ -1205,14 +1823,15 @@ static pmix_status_t hash_store(const pmix_proc_t *proc, pmix_scope_t scope, pmix_kval_t *kv) { - pmix_hash_trkr_t *trk, *t; + pmix_job_t *trk, *t; pmix_status_t rc; pmix_kval_t *kp; + pmix_namespace_t *ns, *nptr; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, - "[%s:%d] gds:hash:hash_store for proc [%s:%d] key %s type %s scope %s", - pmix_globals.myid.nspace, pmix_globals.myid.rank, - proc->nspace, proc->rank, kv->key, + "%s gds:hash:hash_store for proc %s key %s type %s scope %s", + PMIX_NAME_PRINT(&pmix_globals.myid), + PMIX_NAME_PRINT(proc), kv->key, PMIx_Data_type_string(kv->value->type), PMIx_Scope_string(scope)); if (NULL == kv->key) { @@ -1221,7 +1840,7 @@ static pmix_status_t hash_store(const pmix_proc_t *proc, /* find the hash table for this nspace */ trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(proc->nspace, t->ns)) { trk = t; break; @@ -1229,9 +1848,29 @@ static pmix_status_t hash_store(const pmix_proc_t *proc, } if (NULL == trk) { /* create one */ - trk = PMIX_NEW(pmix_hash_trkr_t); + trk = PMIX_NEW(pmix_job_t); trk->ns = strdup(proc->nspace); - pmix_list_append(&myhashes, &trk->super); + /* see if we already have this nspace */ + nptr = NULL; + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { + if (0 == strcmp(ns->nspace, proc->nspace)) { + nptr = ns; + break; + } + } + if (NULL == nptr) { + nptr = PMIX_NEW(pmix_namespace_t); + if (NULL == nptr) { + rc = PMIX_ERR_NOMEM; + PMIX_RELEASE(trk); + return rc; + } + nptr->nspace = strdup(proc->nspace); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); + } + PMIX_RETAIN(nptr); + trk->nptr = nptr; + pmix_list_append(&myjobs, &trk->super); } /* see if the proc is me */ @@ -1264,6 +1903,11 @@ static pmix_status_t hash_store(const pmix_proc_t *proc, } } + /* if the number of procs for the nspace object is new, then update it */ + if (0 == trk->nptr->nprocs && PMIX_CHECK_KEY(kv, PMIX_JOB_SIZE)) { + trk->nptr->nprocs = kv->value->data.uint32; + } + /* store it in the corresponding hash table */ if (PMIX_INTERNAL == scope) { if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->internal, proc->rank, kv))) { @@ -1332,12 +1976,13 @@ static pmix_status_t _hash_store_modex(void * cbdata, pmix_byte_object_t *bo) { pmix_namespace_t *ns = (pmix_namespace_t*)nspace; - pmix_hash_trkr_t *trk, *t; + pmix_job_t *trk, *t; pmix_status_t rc = PMIX_SUCCESS; int32_t cnt; pmix_buffer_t pbkt; pmix_proc_t proc; pmix_kval_t *kv; + pmix_namespace_t *ns2, *nptr; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, "[%s:%d] gds:hash:store_modex for nspace %s", @@ -1346,7 +1991,7 @@ static pmix_status_t _hash_store_modex(void * cbdata, /* find the hash table for this nspace */ trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(ns->nspace, t->ns)) { trk = t; break; @@ -1354,9 +1999,29 @@ static pmix_status_t _hash_store_modex(void * cbdata, } if (NULL == trk) { /* create one */ - trk = PMIX_NEW(pmix_hash_trkr_t); + trk = PMIX_NEW(pmix_job_t); trk->ns = strdup(ns->nspace); - pmix_list_append(&myhashes, &trk->super); + /* see if we already have this nspace */ + nptr = NULL; + PMIX_LIST_FOREACH(ns2, &pmix_globals.nspaces, pmix_namespace_t) { + if (0 == strcmp(ns->nspace, ns2->nspace)) { + nptr = ns2; + break; + } + } + if (NULL == nptr) { + nptr = PMIX_NEW(pmix_namespace_t); + if (NULL == nptr) { + rc = PMIX_ERR_NOMEM; + PMIX_RELEASE(trk); + return rc; + } + nptr->nspace = strdup(ns->nspace); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); + } + PMIX_RETAIN(nptr); + trk->nptr = nptr; + pmix_list_append(&myjobs, &trk->super); } /* this is data returned via the PMIx_Fence call when @@ -1386,14 +2051,20 @@ static pmix_status_t _hash_store_modex(void * cbdata, kv = PMIX_NEW(pmix_kval_t); PMIX_BFROPS_UNPACK(rc, pmix_globals.mypeer, &pbkt, kv, &cnt, PMIX_KVAL); while (PMIX_SUCCESS == rc) { - /* store this in the hash table */ - if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->remote, proc.rank, kv))) { - PMIX_ERROR_LOG(rc); - bo->bytes = pbkt.base_ptr; - bo->size = pbkt.bytes_used; // restore the incoming data - pbkt.base_ptr = NULL; - PMIX_DESTRUCT(&pbkt); - return rc; + if (PMIX_RANK_UNDEF == proc.rank) { + /* if the rank is undefined, then we store it on the + * remote table of rank=0 as we know that rank must + * always exist */ + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->remote, 0, kv))) { + PMIX_ERROR_LOG(rc); + return rc; + } + } else { + /* store this in the hash table */ + if (PMIX_SUCCESS != (rc = pmix_hash_store(&trk->remote, proc.rank, kv))) { + PMIX_ERROR_LOG(rc); + return rc; + } } PMIX_RELEASE(kv); // maintain accounting as the hash increments the ref count /* continue along */ @@ -1415,25 +2086,257 @@ static pmix_status_t _hash_store_modex(void * cbdata, } +static pmix_status_t dohash(pmix_hash_table_t *ht, + const char *key, + pmix_rank_t rank, + bool skip_genvals, + pmix_list_t *kvs) +{ + pmix_status_t rc; + pmix_value_t *val; + pmix_kval_t *kv, *k2; + pmix_info_t *info; + size_t n, ninfo; + bool found; + + rc = pmix_hash_fetch(ht, rank, key, &val); + if (PMIX_SUCCESS == rc) { + /* if the key was NULL, then all found keys will be + * returned as a pmix_data_array_t in the value */ + if (NULL == key) { + if (NULL == val->data.darray || + PMIX_INFO != val->data.darray->type || + 0 == val->data.darray->size) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); + PMIX_RELEASE(val); + return PMIX_ERR_NOT_FOUND; + } + info = (pmix_info_t*)val->data.darray->array; + ninfo = val->data.darray->size; + for (n=0; n < ninfo; n++) { + /* if the rank is UNDEF, then we don't want + * anything that starts with "pmix" */ + if (skip_genvals && + 0 == strncmp(info[n].key, "pmix", 4)) { + continue; + } + /* see if we already have this on the list */ + found = false; + PMIX_LIST_FOREACH(k2, kvs, pmix_kval_t) { + if (PMIX_CHECK_KEY(&info[n], k2->key)) { + found = true; + break; + } + } + if (found) { + continue; + } + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_NOMEM; + } + kv->key = strdup(info[n].key); + kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + if (NULL == kv->value) { + PMIX_VALUE_RELEASE(val); + PMIX_RELEASE(kv); + return PMIX_ERR_NOMEM; + } + PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, + kv->value, &info[n].value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_VALUE_RELEASE(val); + PMIX_RELEASE(kv); + return rc; + } + pmix_list_append(kvs, &kv->super); + } + PMIX_VALUE_RELEASE(val); + } else { + kv = PMIX_NEW(pmix_kval_t); + if (NULL == kv) { + PMIX_VALUE_RELEASE(val); + return PMIX_ERR_NOMEM; + } + kv->key = strdup(key); + kv->value = val; + pmix_list_append(kvs, &kv->super); + } + } + return rc; +} + +static pmix_status_t fetch_nodeinfo(const char *key, pmix_list_t *tgt, + pmix_info_t *info, size_t ninfo, + pmix_list_t *kvs) +{ + size_t n; + pmix_status_t rc; + uint32_t nid=0; + char *hostname = NULL; + bool found = false; + pmix_nodeinfo_t *nd, *ndptr; + pmix_kval_t *kv, *kp2; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "FETCHING NODE INFO"); + + /* scan for the nodeID or hostname to identify + * which node they are asking about */ + for (n=0; n < ninfo; n++) { + if (PMIX_CHECK_KEY(&info[n], PMIX_NODEID)) { + PMIX_VALUE_GET_NUMBER(rc, &info[n].value, nid, uint32_t); + if (PMIX_SUCCESS != rc) { + return rc; + } + found = true; + break; + } else if (PMIX_CHECK_KEY(&info[n], PMIX_HOSTNAME)) { + hostname = info[n].value.data.string; + found = true; + break; + } + } + if (!found) { + return PMIX_ERR_DATA_VALUE_NOT_FOUND; + } + + /* scan the list of nodes to find the matching entry */ + nd = NULL; + PMIX_LIST_FOREACH(ndptr, tgt, pmix_nodeinfo_t) { + if (NULL != hostname && 0 == strcmp(ndptr->hostname, hostname)) { + nd = ndptr; + break; + } + if (NULL == hostname && nid == ndptr->nodeid) { + nd = ndptr; + break; + } + } + if (NULL == nd) { + return PMIX_ERR_NOT_FOUND; + } + /* scan the info list of this node to generate the results */ + rc = PMIX_ERR_NOT_FOUND; + PMIX_LIST_FOREACH(kv, &nd->info, pmix_kval_t) { + if (NULL == key || PMIX_CHECK_KEY(kv, key)) { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(kv->key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, kv->value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + pmix_list_append(kvs, &kp2->super); + rc = PMIX_SUCCESS; + if (NULL != key) { + break; + } + } + } + + return rc; +} + +static pmix_status_t fetch_appinfo(const char *key, pmix_list_t *tgt, + pmix_info_t *info, size_t ninfo, + pmix_list_t *kvs) +{ + size_t n; + pmix_status_t rc; + uint32_t appnum; + bool found = false; + pmix_apptrkr_t *app, *apptr; + pmix_kval_t *kv, *kp2; + + pmix_output_verbose(2, pmix_gds_base_framework.framework_output, + "FETCHING APP INFO"); + + /* scan for the appnum to identify + * which app they are asking about */ + for (n=0; n < ninfo; n++) { + if (PMIX_CHECK_KEY(&info[n], PMIX_APPNUM)) { + PMIX_VALUE_GET_NUMBER(rc, &info[n].value, appnum, uint32_t); + if (PMIX_SUCCESS != rc) { + return rc; + } + found = true; + break; + } + } + if (!found) { + return PMIX_ERR_DATA_VALUE_NOT_FOUND; + } + + /* scan the list of apps to find the matching entry */ + app = NULL; + PMIX_LIST_FOREACH(apptr, tgt, pmix_apptrkr_t) { + if (appnum == apptr->appnum) { + app = apptr; + break; + } + } + if (NULL == app) { + return PMIX_ERR_NOT_FOUND; + } + + /* see if they wanted to know something about a node that + * is associated with this app */ + rc = fetch_nodeinfo(key, &app->nodeinfo, info, ninfo, kvs); + if (PMIX_ERR_DATA_VALUE_NOT_FOUND != rc) { + return rc; + } + + /* scan the info list of this app to generate the results */ + rc = PMIX_ERR_NOT_FOUND; + PMIX_LIST_FOREACH(kv, &app->appinfo, pmix_kval_t) { + if (NULL == key || PMIX_CHECK_KEY(kv, key)) { + kp2 = PMIX_NEW(pmix_kval_t); + kp2->key = strdup(kv->key); + kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kp2->value, kv->value); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(kp2); + return rc; + } + pmix_list_append(kvs, &kp2->super); + rc = PMIX_SUCCESS; + if (NULL != key) { + break; + } + } + } + + return rc; +} + static pmix_status_t hash_fetch(const pmix_proc_t *proc, pmix_scope_t scope, bool copy, const char *key, pmix_info_t qualifiers[], size_t nqual, pmix_list_t *kvs) { - pmix_hash_trkr_t *trk, *t; + pmix_job_t *trk, *t; pmix_status_t rc; pmix_value_t *val; - pmix_kval_t *kv; + pmix_kval_t *kv, *kvptr; pmix_info_t *info; size_t n, ninfo; pmix_hash_table_t *ht; + pmix_session_t *sptr; + uint32_t sid; + pmix_rank_t rnk; pmix_output_verbose(2, pmix_gds_base_framework.framework_output, - "[%s:%u] pmix:gds:hash fetch %s for proc %s:%u on scope %s", - pmix_globals.myid.nspace, pmix_globals.myid.rank, + "%s pmix:gds:hash fetch %s for proc %s on scope %s", + PMIX_NAME_PRINT(&pmix_globals.myid), (NULL == key) ? "NULL" : key, - proc->nspace, proc->rank, PMIx_Scope_string(scope)); + PMIX_NAME_PRINT(proc), PMIx_Scope_string(scope)); /* if the rank is wildcard and the key is NULL, then * they are asking for a complete copy of the job-level @@ -1442,7 +2345,7 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, /* see if we have a tracker for this nspace - we will * if we already cached the job info for it */ trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(proc->nspace, t->ns)) { trk = t; break; @@ -1497,9 +2400,58 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, return PMIX_SUCCESS; } + /* if the nspace and rank are undefined, then they are asking + * for session-level information. */ + if (0 == strlen(proc->nspace) && PMIX_RANK_UNDEF == proc->rank) { + /* they must have included something identifying the info + * class they are querying */ + for (n=0; n < nqual; n++) { + if (PMIX_CHECK_KEY(&qualifiers[n], PMIX_SESSION_ID)) { + /* they want session-level info - see if we have + * that session */ + PMIX_VALUE_GET_NUMBER(rc, &qualifiers[n].value, sid, uint32_t); + if (PMIX_SUCCESS != rc) { + /* didn't provide a correct value */ + PMIX_ERROR_LOG(rc); + return rc; + } + PMIX_LIST_FOREACH(sptr, &mysessions, pmix_session_t) { + if (sptr->session == sid) { + /* see if they want info for a specific node */ + rc = fetch_nodeinfo(key, &sptr->nodeinfo, qualifiers, nqual, kvs); + /* if they did, then we are done */ + if (PMIX_ERR_DATA_VALUE_NOT_FOUND != rc) { + return rc; + } + /* check the session info */ + PMIX_LIST_FOREACH(kvptr, &sptr->sessioninfo, pmix_kval_t) { + if (NULL == key || PMIX_CHECK_KEY(kvptr, key)) { + kv = PMIX_NEW(pmix_kval_t); + kv->key = strdup(kvptr->key); + kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); + PMIX_VALUE_XFER(rc, kv->value, kvptr->value); + if (PMIX_SUCCESS != rc) { + PMIX_RELEASE(kv); + return rc; + } + pmix_list_append(kvs, &kv->super); + if (NULL != key) { + /* we are done */ + return PMIX_SUCCESS; + } + } + } + } + } + /* if we get here, then the session wasn't found */ + return PMIX_ERR_NOT_FOUND; + } + } + } + /* find the hash table for this nspace */ trk = NULL; - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(proc->nspace, t->ns)) { trk = t; break; @@ -1509,6 +2461,24 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, return PMIX_ERR_INVALID_NAMESPACE; } + /* if the rank isn't specified, check to see if they + * are looking for app-level or node-level info for + * this job */ + if (PMIX_RANK_UNDEF == proc->rank) { + /* see if they want info for a specific node */ + rc = fetch_nodeinfo(key, &trk->nodeinfo, qualifiers, nqual, kvs); + /* if they did, then we are done */ + if (PMIX_ERR_DATA_VALUE_NOT_FOUND != rc) { + return rc; + } + /* see if they want info for a specific app */ + rc = fetch_appinfo(key, &trk->apps, qualifiers, nqual, kvs); + /* if they did, then we are done */ + if (PMIX_ERR_DATA_VALUE_NOT_FOUND != rc) { + return rc; + } + } + /* fetch from the corresponding hash table - note that * we always provide a copy as we don't support * shared memory */ @@ -1528,59 +2498,56 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, } doover: - rc = pmix_hash_fetch(ht, proc->rank, key, &val); - if (PMIX_SUCCESS == rc) { - /* if the key was NULL, then all found keys will be - * returned as a pmix_data_array_t in the value */ - if (NULL == key) { - if (NULL == val->data.darray || - PMIX_INFO != val->data.darray->type || - 0 == val->data.darray->size) { - PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); - return PMIX_ERR_NOT_FOUND; + /* if rank=PMIX_RANK_UNDEF, then we need to search all + * known ranks for this nspace as any one of them could + * be the source */ + if (PMIX_RANK_UNDEF == proc->rank) { + for (rnk=0; rnk < trk->nptr->nprocs; rnk++) { + rc = dohash(ht, key, rnk, true, kvs); + if (PMIX_ERR_NOMEM == rc) { + return rc; } - info = (pmix_info_t*)val->data.darray->array; - ninfo = val->data.darray->size; - for (n=0; n < ninfo; n++) { + if (PMIX_SUCCESS == rc && NULL != key) { + return rc; + } + } + /* also need to check any job-level info */ + PMIX_LIST_FOREACH(kvptr, &trk->jobinfo, pmix_kval_t) { + if (NULL == key || PMIX_CHECK_KEY(kvptr, key)) { kv = PMIX_NEW(pmix_kval_t); - if (NULL == kv) { - PMIX_VALUE_RELEASE(val); - return PMIX_ERR_NOMEM; - } - kv->key = strdup(info[n].key); + kv->key = strdup(kvptr->key); kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t)); - if (NULL == kv->value) { - PMIX_VALUE_RELEASE(val); - PMIX_RELEASE(kv); - return PMIX_ERR_NOMEM; - } - PMIX_BFROPS_VALUE_XFER(rc, pmix_globals.mypeer, - kv->value, &info[n].value); + PMIX_VALUE_XFER(rc, kv->value, kvptr->value); if (PMIX_SUCCESS != rc) { - PMIX_ERROR_LOG(rc); - PMIX_VALUE_RELEASE(val); PMIX_RELEASE(kv); return rc; } pmix_list_append(kvs, &kv->super); + if (NULL != key) { + break; + } } - PMIX_VALUE_RELEASE(val); - if (PMIX_GLOBAL == scope && ht == &trk->local) { + } + if (NULL == key) { + /* and need to add all job info just in case that was + * passed via a different GDS component */ + dohash(&trk->internal, NULL, PMIX_RANK_WILDCARD, false, kvs); + } + } else { + rc = dohash(ht, key, proc->rank, false, kvs); + } + if (PMIX_SUCCESS == rc) { + if (PMIX_GLOBAL == scope) { + if (ht == &trk->local) { /* need to do this again for the remote data */ ht = &trk->remote; goto doover; + } else if (ht == &trk->internal) { + /* check local */ + ht = &trk->local; + goto doover; } - return PMIX_SUCCESS; } - /* just return the value */ - kv = PMIX_NEW(pmix_kval_t); - if (NULL == kv) { - PMIX_VALUE_RELEASE(val); - return PMIX_ERR_NOMEM; - } - kv->key = strdup(key); - kv->value = val; - pmix_list_append(kvs, &kv->super); } else { if (PMIX_GLOBAL == scope || PMIX_SCOPE_UNDEF == scope) { @@ -1595,6 +2562,9 @@ static pmix_status_t hash_fetch(const pmix_proc_t *proc, } } } + if (0 == pmix_list_get_size(kvs)) { + rc = PMIX_ERR_NOT_FOUND; + } return rc; } @@ -1615,13 +2585,13 @@ static pmix_status_t nspace_add(const char *nspace, static pmix_status_t nspace_del(const char *nspace) { - pmix_hash_trkr_t *t; + pmix_job_t *t; /* find the hash table for this nspace */ - PMIX_LIST_FOREACH(t, &myhashes, pmix_hash_trkr_t) { + PMIX_LIST_FOREACH(t, &myjobs, pmix_job_t) { if (0 == strcmp(nspace, t->ns)) { /* release it */ - pmix_list_remove_item(&myhashes, &t->super); + pmix_list_remove_item(&myjobs, &t->super); PMIX_RELEASE(t); break; } @@ -1685,6 +2655,12 @@ static pmix_status_t accept_kvs_resp(pmix_buffer_t *buf) PMIX_ERROR_LOG(rc); return rc; } + /* if the rank is UNDEF, then we store this on our own + * rank tables */ + if (PMIX_RANK_UNDEF == proct.rank) { + proct.rank = pmix_globals.myid.rank; + } + cnt = 1; kv = PMIX_NEW(pmix_kval_t); PMIX_BFROPS_UNPACK(rc, pmix_client_globals.myserver, @@ -1694,7 +2670,6 @@ static pmix_status_t accept_kvs_resp(pmix_buffer_t *buf) * the kval contains shmem connection info, then the * component will know what to do about it (or else * we selected the wrong component for this peer!) */ - PMIX_GDS_STORE_KV(rc, pmix_globals.mypeer, &proct, PMIX_INTERNAL, kv); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c index 800923c34fc..1d48b462770 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv4/pif_bsdx.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -158,7 +158,7 @@ static int if_bsdx_open(void) /* fill values into the pmix_pif_t */ memcpy(&a4, &(sin_addr->sin_addr), sizeof(struct in_addr)); - pmix_strncpy(intf->if_name, cur_ifaddrs->ifa_name, IF_NAMESIZE-1); + pmix_strncpy(intf->if_name, cur_ifaddrs->ifa_name, PMIX_IF_NAMESIZE-1); intf->if_index = pmix_list_get_size(&pmix_if_list) + 1; ((struct sockaddr_in*) &intf->if_addr)->sin_addr = a4; ((struct sockaddr_in*) &intf->if_addr)->sin_family = AF_INET; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c index 5954f1580c5..ff30d73500b 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/bsdx_ipv6/pif_bsdx_ipv6.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -183,7 +183,7 @@ static int if_bsdx_ipv6_open(void) return PMIX_ERR_OUT_OF_RESOURCE; } intf->af_family = AF_INET6; - pmix_strncpy(intf->if_name, cur_ifaddrs->ifa_name, IF_NAMESIZE-1); + pmix_strncpy(intf->if_name, cur_ifaddrs->ifa_name, PMIX_IF_NAMESIZE-1); intf->if_index = pmix_list_get_size(&pmix_if_list) + 1; ((struct sockaddr_in6*) &intf->if_addr)->sin6_addr = a6; ((struct sockaddr_in6*) &intf->if_addr)->sin6_family = AF_INET6; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c index f0bb2db9f5e..53bec6fb04a 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/linux_ipv6/pif_linux_ipv6.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -80,12 +80,17 @@ static int if_linux_ipv6_open(void) { FILE *f; if ((f = fopen("/proc/net/if_inet6", "r"))) { - char ifname[IF_NAMESIZE]; + /* IF_NAMESIZE is normally 16 on Linux, + but the next scanf allows up to 21 bytes */ + char ifname[PMIX_IF_NAMESIZE]; unsigned int idx, pfxlen, scope, dadstat; struct in6_addr a6; int iter; uint32_t flag; - unsigned int addrbyte[16]; + unsigned int addrbyte[PMIX_IF_NAMESIZE]; + + memset(addrbyte, 0, PMIX_IF_NAMESIZE*sizeof(unsigned int)); + memset(ifname, 0, PMIX_IF_NAMESIZE*sizeof(char)); while (fscanf(f, "%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x%2x %x %x %x %x %20s\n", &addrbyte[0], &addrbyte[1], &addrbyte[2], &addrbyte[3], @@ -102,8 +107,8 @@ static int if_linux_ipv6_open(void) addrbyte[8], addrbyte[9], addrbyte[10], addrbyte[11], addrbyte[12], addrbyte[13], addrbyte[14], addrbyte[15], scope); - /* we don't want any other scope less than link-local */ - if (scope < 0x20) { + /* Only interested in global (0x00) scope */ + if (scope != 0x00) { pmix_output_verbose(1, pmix_pif_base_framework.framework_output, "skipping interface %2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x scope %x\n", addrbyte[0], addrbyte[1], addrbyte[2], addrbyte[3], @@ -127,7 +132,7 @@ static int if_linux_ipv6_open(void) } /* now construct the pmix_pif_t */ - pmix_strncpy(intf->if_name, ifname, IF_NAMESIZE-1); + pmix_strncpy(intf->if_name, ifname, PMIX_IF_NAMESIZE-1); intf->if_index = pmix_list_get_size(&pmix_if_list)+1; intf->if_kernel_index = (uint16_t) idx; ((struct sockaddr_in6*) &intf->if_addr)->sin6_addr = a6; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/pif.h b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/pif.h index e43de4707db..9d23fdf1ff5 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/pif.h +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/pif.h @@ -3,7 +3,7 @@ * Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -73,7 +73,7 @@ BEGIN_C_DECLS typedef struct pmix_pif_t { pmix_list_item_t super; - char if_name[IF_NAMESIZE+1]; + char if_name[PMIX_IF_NAMESIZE+1]; int if_index; uint16_t if_kernel_index; uint16_t af_family; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c index a7d94f79086..c9895cb6617 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pif/solaris_ipv6/pif_solaris_ipv6.c @@ -3,7 +3,7 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -190,7 +190,7 @@ static int if_solaris_ipv6_open(void) } intf->af_family = AF_INET6; - pmix_strncpy (intf->if_name, lifreq->lifr_name, IF_NAMESIZE-1); + pmix_strncpy (intf->if_name, lifreq->lifr_name, PMIX_IF_NAMESIZE-1); intf->if_index = pmix_list_get_size(&pmix_if_list)+1; memcpy(&intf->if_addr, my_addr, sizeof (*my_addr)); intf->if_mask = 64; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/base/plog_base_stubs.c b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/base/plog_base_stubs.c index 226db25b275..221ec775f87 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/base/plog_base_stubs.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/base/plog_base_stubs.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2018 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -109,8 +109,8 @@ pmix_status_t pmix_plog_base_log(const pmix_proc_t *source, * channel that can successfully handle this request, * and any channel directives */ for (n=0; n < ndirs; n++) { - if (0 == strncmp(directives[n].key, PMIX_LOG_ONCE, PMIX_MAX_KEYLEN)) { - logonce = true; + if (PMIX_CHECK_KEY(&directives[n], PMIX_LOG_ONCE)) { + logonce = PMIX_INFO_TRUE(&directives[n]); break; } } @@ -237,14 +237,10 @@ pmix_status_t pmix_plog_base_log(const pmix_proc_t *source, rc = mycount->status; // save the status as it could change when the lock is released if (0 == mycount->nreqs) { - /* execute their callback */ - if (NULL != mycount->cbfunc) { - mycount->cbfunc(mycount->status, mycount->cbdata); - } PMIX_RELEASE_THREAD(&mycount->lock); PMIX_RELEASE(mycount); PMIX_RELEASE_THREAD(&pmix_plog_globals.lock); - return PMIX_SUCCESS; + return PMIX_OPERATION_SUCCEEDED; } PMIX_RELEASE_THREAD(&mycount->lock); PMIX_RELEASE_THREAD(&pmix_plog_globals.lock); diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/plog_stdfd.c b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/plog_stdfd.c index 619dc38f702..2aceac179ad 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/plog_stdfd.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/plog/stdfd/plog_stdfd.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -101,6 +101,9 @@ static pmix_status_t mylog(const pmix_proc_t *source, /* check to see if there are any stdfd entries */ rc = PMIX_ERR_TAKE_NEXT_OPTION; for (n=0; n < ndata; n++) { + if (PMIX_INFO_OP_IS_COMPLETE(&data[n])) { + continue; + } if (0 == strncmp(data[n].key, PMIX_LOG_STDERR, PMIX_MAX_KEYLEN)) { bo.bytes = data[n].value.data.string; bo.size = strlen(bo.bytes); @@ -117,6 +120,5 @@ static pmix_status_t mylog(const pmix_proc_t *source, rc = PMIX_SUCCESS; } } - return rc; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c index d62268dbd52..447a8e1ca14 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/base/pnet_base_fns.c @@ -60,7 +60,7 @@ pmix_status_t pmix_pnet_base_allocate(char *nspace, nptr = NULL; /* find this nspace - note that it may not have * been registered yet */ - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(ns->nspace, nspace)) { nptr = ns; break; @@ -73,7 +73,7 @@ pmix_status_t pmix_pnet_base_allocate(char *nspace, return PMIX_ERR_NOMEM; } nptr->nspace = strdup(nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } if (NULL != info) { @@ -146,7 +146,7 @@ pmix_status_t pmix_pnet_base_setup_local_network(char *nspace, /* find this proc's nspace object */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(ns->nspace, nspace)) { nptr = ns; break; @@ -159,7 +159,7 @@ pmix_status_t pmix_pnet_base_setup_local_network(char *nspace, return PMIX_ERR_NOMEM; } nptr->nspace = strdup(nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } PMIX_LIST_FOREACH(active, &pmix_pnet_globals.actives, pmix_pnet_base_active_module_t) { @@ -191,7 +191,7 @@ pmix_status_t pmix_pnet_base_setup_fork(const pmix_proc_t *proc, char ***env) /* find this proc's nspace object */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(ns->nspace, proc->nspace)) { nptr = ns; break; @@ -204,7 +204,7 @@ pmix_status_t pmix_pnet_base_setup_fork(const pmix_proc_t *proc, char ***env) return PMIX_ERR_NOMEM; } nptr->nspace = strdup(proc->nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } PMIX_LIST_FOREACH(active, &pmix_pnet_globals.actives, pmix_pnet_base_active_module_t) { @@ -282,7 +282,7 @@ void pmix_pnet_base_deregister_nspace(char *nspace) /* find this nspace object */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(ns->nspace, nspace)) { nptr = ns; break; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c index d795c8bc486..712b1644219 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/opa/pnet_opa.c @@ -435,7 +435,7 @@ static pmix_status_t collect_inventory(pmix_info_t directives[], size_t ndirs, pmix_buffer_t bucket, pbkt; bool found = false; pmix_byte_object_t pbo; - char nodename[PMIX_MAXHOSTNAMELEN], *foo; + char nodename[PMIX_MAXHOSTNAMELEN] = {0}, *foo; pmix_output_verbose(2, pmix_pnet_base_framework.framework_output, "pnet:opa collect inventory"); @@ -443,7 +443,7 @@ static pmix_status_t collect_inventory(pmix_info_t directives[], size_t ndirs, /* setup the bucket - we will pass the results as a blob */ PMIX_CONSTRUCT(&bucket, pmix_buffer_t); /* pack our node name */ - gethostname(nodename, sizeof(nodename)); + gethostname(nodename, sizeof(nodename)-1); foo = &nodename[0]; PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &bucket, &foo, 1, PMIX_STRING); if (PMIX_SUCCESS != rc) { diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp.c index fecec014243..81e823ad245 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pnet/tcp/pnet_tcp.c @@ -737,6 +737,7 @@ static pmix_status_t setup_local_network(pmix_namespace_t *nptr, "pnet:tcp:setup_local_network"); if (NULL != info) { + idkey = strdup("default"); for (n=0; n < ninfo; n++) { /* look for my key */ if (0 == strncmp(info[n].key, PMIX_TCP_SETUP_APP_KEY, PMIX_MAX_KEYLEN)) { @@ -866,8 +867,8 @@ static pmix_status_t collect_inventory(pmix_info_t directives[], size_t ndirs, pmix_inventory_cbfunc_t cbfunc, void *cbdata) { pmix_inventory_rollup_t *cd = (pmix_inventory_rollup_t*)cbdata; - char *prefix, myhost[PMIX_MAXHOSTNAMELEN]; - char myconnhost[PMIX_MAXHOSTNAMELEN]; + char *prefix, myhost[PMIX_MAXHOSTNAMELEN] = {0}; + char myconnhost[PMIX_MAXHOSTNAMELEN] = {0}; char name[32], uri[2048]; struct sockaddr_storage my_ss; char *foo; @@ -884,7 +885,7 @@ static pmix_status_t collect_inventory(pmix_info_t directives[], size_t ndirs, /* setup the bucket - we will pass the results as a blob */ PMIX_CONSTRUCT(&bucket, pmix_buffer_t); /* add our hostname */ - gethostname(myhost, sizeof(myhost)); + gethostname(myhost, sizeof(myhost)-1); foo = &myhost[0]; PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &bucket, &foo, 1, PMIX_STRING); if (PMIX_SUCCESS != rc) { @@ -919,11 +920,11 @@ static pmix_status_t collect_inventory(pmix_info_t directives[], size_t ndirs, if (AF_INET == my_ss.ss_family) { prefix = "tcp4://"; inet_ntop(AF_INET, &((struct sockaddr_in*) &my_ss)->sin_addr, - myconnhost, PMIX_MAXHOSTNAMELEN); + myconnhost, PMIX_MAXHOSTNAMELEN-1); } else if (AF_INET6 == my_ss.ss_family) { prefix = "tcp6://"; inet_ntop(AF_INET6, &((struct sockaddr_in6*) &my_ss)->sin6_addr, - myconnhost, PMIX_MAXHOSTNAMELEN); + myconnhost, PMIX_MAXHOSTNAMELEN-1); } else { continue; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/pshmem_mmap.c b/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/pshmem_mmap.c index 09c377cd8bb..6529c1fa4a8 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/pshmem_mmap.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/pshmem/mmap/pshmem_mmap.c @@ -3,7 +3,7 @@ * All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2017-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -82,9 +82,9 @@ static int _mmap_segment_create(pmix_pshmem_seg_t *sm_seg, const char *file_name if (ENOSPC == rc) { rc = PMIX_ERR_OUT_OF_RESOURCE; goto out; - } else if ((ENOTSUP != rc) + } else if (EINVAL != rc && ENOTSUP != rc #ifdef EOPNOTSUPP - && (EOPNOTSUPP != rc) + && EOPNOTSUPP != rc #endif ){ rc = PMIX_ERROR; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c index 0252eed51c2..e86a4126405 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp.c @@ -24,6 +24,7 @@ */ #include +#include "src/include/pmix_globals.h" #ifdef HAVE_FCNTL_H #include @@ -50,7 +51,6 @@ #include #endif -#include "src/include/pmix_globals.h" #include "src/include/pmix_socket_errno.h" #include "src/client/pmix_client_ops.h" #include "src/server/pmix_server_ops.h" @@ -131,7 +131,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, char *p, *p2, *server_nspace = NULL, *rendfile = NULL; int sd, rc; size_t n; - char myhost[PMIX_MAXHOSTNAMELEN]; + char myhost[PMIX_MAXHOSTNAMELEN] = {0}; bool system_level = false; bool system_level_only = false; bool reconnect = false; @@ -414,7 +414,7 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer, /* mark that we are using the V2 protocol */ pmix_globals.mypeer->protocol = PMIX_PROTOCOL_V2; - gethostname(myhost, sizeof(myhost)); + gethostname(myhost, sizeof(myhost)-1); /* if we were given a URI via MCA param, then look no further */ if (NULL != suri) { if (NULL != server_nspace) { diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c index 61eb18ec305..cb800a6fdf9 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/tcp/ptl_tcp_component.c @@ -360,8 +360,8 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, bool session_tool = false; bool system_tool = false; pmix_socklen_t addrlen; - char *prefix, myhost[PMIX_MAXHOSTNAMELEN]; - char myconnhost[PMIX_MAXHOSTNAMELEN]; + char *prefix, myhost[PMIX_MAXHOSTNAMELEN] = {0}; + char myconnhost[PMIX_MAXHOSTNAMELEN] = {0}; int myport; pmix_kval_t *urikv; @@ -624,17 +624,17 @@ static pmix_status_t setup_listener(pmix_info_t info[], size_t ninfo, goto sockerror; } - gethostname(myhost, sizeof(myhost)); + gethostname(myhost, sizeof(myhost)-1); if (AF_INET == mca_ptl_tcp_component.connection.ss_family) { prefix = "tcp4://"; myport = ntohs(((struct sockaddr_in*) &mca_ptl_tcp_component.connection)->sin_port); inet_ntop(AF_INET, &((struct sockaddr_in*) &mca_ptl_tcp_component.connection)->sin_addr, - myconnhost, PMIX_MAXHOSTNAMELEN); + myconnhost, PMIX_MAXHOSTNAMELEN-1); } else if (AF_INET6 == mca_ptl_tcp_component.connection.ss_family) { prefix = "tcp6://"; myport = ntohs(((struct sockaddr_in6*) &mca_ptl_tcp_component.connection)->sin6_port); inet_ntop(AF_INET6, &((struct sockaddr_in6*) &mca_ptl_tcp_component.connection)->sin6_addr, - myconnhost, PMIX_MAXHOSTNAMELEN); + myconnhost, PMIX_MAXHOSTNAMELEN-1); } else { goto sockerror; } @@ -898,7 +898,7 @@ static char **split_and_resolve(char **orig_str, char *name) { int i, ret, save, if_index; char **argv, *str, *tmp; - char if_name[IF_NAMESIZE]; + char if_name[PMIX_IF_NAMESIZE]; struct sockaddr_storage argv_inaddr, if_inaddr; uint32_t argv_prefix; @@ -1384,7 +1384,7 @@ static void connection_handler(int sd, short args, void *cbdata) * of local clients. So let's start by searching for * the nspace object */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, nspace)) { nptr = tmp; break; @@ -1534,7 +1534,7 @@ static void connection_handler(int sd, short args, void *cbdata) /* see if we know this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, nspace)) { nptr = tmp; break; @@ -1838,7 +1838,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) if (5 != pnd->flag && 8 != pnd->flag) { PMIX_RETAIN(nptr); nptr->nspace = strdup(cd->proc.nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); info = PMIX_NEW(pmix_rank_info_t); info->pname.nspace = strdup(nptr->nspace); info->pname.rank = cd->proc.rank; @@ -1866,7 +1866,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) peer->nptr->compat.psec = pmix_psec_base_assign_module(pnd->psec); if (NULL == peer->nptr->compat.psec) { PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object CLOSE_THE_SOCKET(pnd->sd); goto done; @@ -1881,7 +1881,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) PMIX_INFO_DESTRUCT(&ginfo); if (NULL == peer->nptr->compat.gds) { PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object CLOSE_THE_SOCKET(pnd->sd); goto done; @@ -1900,7 +1900,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) req = PMIX_NEW(pmix_iof_req_t); if (NULL == req) { PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object CLOSE_THE_SOCKET(pnd->sd); goto done; @@ -1936,7 +1936,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) "validation of tool credentials failed: %s", PMIx_Error_string(rc)); PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object CLOSE_THE_SOCKET(pnd->sd); goto done; @@ -1949,7 +1949,7 @@ static void process_cbfunc(int sd, short args, void *cbdata) PMIX_RELEASE(pnd); PMIX_RELEASE(cd); PMIX_RELEASE(peer); - pmix_list_remove_item(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_remove_item(&pmix_globals.nspaces, &nptr->super); PMIX_RELEASE(nptr); // will release the info object /* probably cannot send an error reply if we are out of memory */ return; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock.c index fc7b6da1c47..51417f3e032 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock.c @@ -13,7 +13,7 @@ * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -611,7 +611,7 @@ void pmix_usock_send_handler(int sd, short flags, void *cbdata) return; } else { // report the error - event_del(&peer->send_event); + pmix_event_del(&peer->send_event); peer->send_ev_active = false; PMIX_RELEASE(msg); peer->send_msg = NULL; diff --git a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c index ef33e766f9c..36637cc9882 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c +++ b/opal/mca/pmix/pmix3x/pmix/src/mca/ptl/usock/ptl_usock_component.c @@ -552,7 +552,7 @@ static void connection_handler(int sd, short args, void *cbdata) /* see if we know this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, nspace)) { nptr = tmp; break; diff --git a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c index 87a1456f4d4..c083ad645f3 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c +++ b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_finalize.c @@ -12,9 +12,9 @@ * Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2016-2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -116,6 +116,8 @@ void pmix_rte_finalize(void) } PMIX_DESTRUCT(&pmix_globals.notifications); PMIX_LIST_DESTRUCT(&pmix_globals.iof_requests); + free(pmix_globals.hostname); + PMIX_LIST_DESTRUCT(&pmix_globals.nspaces); /* now safe to release the event base */ if (!pmix_globals.external_evbase) { diff --git a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_init.c b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_init.c index d1803de7046..b3255e4e5da 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_init.c +++ b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_init.c @@ -33,7 +33,9 @@ #include #endif #include PMIX_EVENT_HEADER -#include "event2/thread.h" +#if ! PMIX_HAVE_LIBEV +#include PMIX_EVENT2_THREAD_HEADER +#endif #include @@ -98,7 +100,7 @@ int pmix_rte_init(pmix_proc_type_t type, int ret, debug_level; char *error = NULL, *evar; size_t n; - char hostname[PMIX_MAXHOSTNAMELEN]; + char hostname[PMIX_MAXHOSTNAMELEN] = {0}; if( ++pmix_initialized != 1 ) { if( pmix_initialized < 1 ) { @@ -159,7 +161,7 @@ int pmix_rte_init(pmix_proc_type_t type, } /* setup the globals structure */ - gethostname(hostname, PMIX_MAXHOSTNAMELEN); + gethostname(hostname, PMIX_MAXHOSTNAMELEN-1); pmix_globals.hostname = strdup(hostname); memset(&pmix_globals.myid.nspace, 0, PMIX_MAX_NSLEN+1); pmix_globals.myid.rank = PMIX_RANK_INVALID; @@ -172,6 +174,8 @@ int pmix_rte_init(pmix_proc_type_t type, ret = pmix_hotel_init(&pmix_globals.notifications, pmix_globals.max_events, pmix_globals.evbase, pmix_globals.event_eviction_time, _notification_eviction_cbfunc); + PMIX_CONSTRUCT(&pmix_globals.nspaces, pmix_list_t); + if (PMIX_SUCCESS != ret) { error = "notification hotel init"; goto return_error; diff --git a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c index a66e4d0a768..7e40422a0bd 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c +++ b/opal/mca/pmix/pmix3x/pmix/src/runtime/pmix_progress_threads.c @@ -1,8 +1,8 @@ /* * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2017-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2019 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ @@ -13,7 +13,6 @@ */ #include -#include "src/include/types.h" #ifdef HAVE_UNISTD_H #include @@ -49,6 +48,12 @@ typedef struct { bool engine_constructed; pmix_thread_t engine; +#if PMIX_HAVE_LIBEV + ev_async async; + pthread_mutex_t mutex; + pthread_cond_t cond; + pmix_list_t list; +#endif } pmix_progress_tracker_t; static void tracker_constructor(pmix_progress_tracker_t *p) @@ -58,6 +63,10 @@ static void tracker_constructor(pmix_progress_tracker_t *p) p->ev_base = NULL; p->ev_active = false; p->engine_constructed = false; +#if PMIX_HAVE_LIBEV + pthread_mutex_init(&p->mutex, NULL); + PMIX_CONSTRUCT(&p->list, pmix_list_t); +#endif } static void tracker_destructor(pmix_progress_tracker_t *p) @@ -73,6 +82,10 @@ static void tracker_destructor(pmix_progress_tracker_t *p) if (p->engine_constructed) { PMIX_DESTRUCT(&p->engine); } +#if PMIX_HAVE_LIBEV + pthread_mutex_destroy(&p->mutex); + PMIX_LIST_DESTRUCT(&p->list); +#endif } static PMIX_CLASS_INSTANCE(pmix_progress_tracker_t, @@ -80,6 +93,114 @@ static PMIX_CLASS_INSTANCE(pmix_progress_tracker_t, tracker_constructor, tracker_destructor); +#if PMIX_HAVE_LIBEV + +typedef enum { + PMIX_EVENT_ACTIVE, + PMIX_EVENT_ADD, + PMIX_EVENT_DEL +} pmix_event_type_t; + +typedef struct { + pmix_list_item_t super; + struct event *ev; + struct timeval *tv; + int res; + short ncalls; + pmix_event_type_t type; +} pmix_event_caddy_t; + +static PMIX_CLASS_INSTANCE(pmix_event_caddy_t, + pmix_list_item_t, + NULL, NULL); + +static pmix_progress_tracker_t* pmix_progress_tracker_get_by_base(struct event_base *); + +static void pmix_libev_ev_async_cb (EV_P_ ev_async *w, int revents) +{ + pmix_progress_tracker_t *trk = pmix_progress_tracker_get_by_base((struct event_base *)EV_A); + assert(NULL != trk); + pthread_mutex_lock (&trk->mutex); + pmix_event_caddy_t *cd, *next; + PMIX_LIST_FOREACH_SAFE(cd, next, &trk->list, pmix_event_caddy_t) { + switch (cd->type) { + case PMIX_EVENT_ADD: + (void)event_add(cd->ev, cd->tv); + break; + case PMIX_EVENT_DEL: + (void)event_del(cd->ev); + break; + case PMIX_EVENT_ACTIVE: + (void)event_active(cd->ev, cd->res, cd->ncalls); + break; + } + pmix_list_remove_item(&trk->list, &cd->super); + PMIX_RELEASE(cd); + } + pthread_mutex_unlock (&trk->mutex); +} + +int pmix_event_add(struct event *ev, struct timeval *tv) { + int res; + pmix_progress_tracker_t *trk = pmix_progress_tracker_get_by_base(ev->ev_base); + if ((NULL != trk) && !pthread_equal(pthread_self(), trk->engine.t_handle)) { + pmix_event_caddy_t *cd = PMIX_NEW(pmix_event_caddy_t); + cd->type = PMIX_EVENT_ADD; + cd->ev = ev; + cd->tv = tv; + pthread_mutex_lock(&trk->mutex); + pmix_list_append(&trk->list, &cd->super); + ev_async_send ((struct ev_loop *)trk->ev_base, &trk->async); + pthread_mutex_unlock(&trk->mutex); + res = PMIX_SUCCESS; + } else { + res = event_add(ev, tv); + } + return res; +} + +int pmix_event_del(struct event *ev) { + int res; + pmix_progress_tracker_t *trk = pmix_progress_tracker_get_by_base(ev->ev_base); + if ((NULL != trk) && !pthread_equal(pthread_self(), trk->engine.t_handle)) { + pmix_event_caddy_t *cd = PMIX_NEW(pmix_event_caddy_t); + cd->type = PMIX_EVENT_DEL; + cd->ev = ev; + pthread_mutex_lock(&trk->mutex); + pmix_list_append(&trk->list, &cd->super); + ev_async_send ((struct ev_loop *)trk->ev_base, &trk->async); + pthread_mutex_unlock(&trk->mutex); + res = PMIX_SUCCESS; + } else { + res = event_del(ev); + } + return res; +} + +void pmix_event_active (struct event *ev, int res, short ncalls) { + pmix_progress_tracker_t *trk = pmix_progress_tracker_get_by_base(ev->ev_base); + if ((NULL != trk) && !pthread_equal(pthread_self(), trk->engine.t_handle)) { + pmix_event_caddy_t *cd = PMIX_NEW(pmix_event_caddy_t); + cd->type = PMIX_EVENT_ACTIVE; + cd->ev = ev; + cd->res = res; + cd->ncalls = ncalls; + pthread_mutex_lock(&trk->mutex); + pmix_list_append(&trk->list, &cd->super); + ev_async_send ((struct ev_loop *)trk->ev_base, &trk->async); + pthread_mutex_unlock(&trk->mutex); + } else { + event_active(ev, res, ncalls); + } +} + +void pmix_event_base_loopexit (pmix_event_base_t *ev_base) { + pmix_progress_tracker_t *trk = pmix_progress_tracker_get_by_base(ev_base); + assert(NULL != trk); + ev_async_send ((struct ev_loop *)trk->ev_base, &trk->async); +} +#endif + static bool inited = false; static pmix_list_t tracking; static struct timeval long_timeout = { @@ -118,7 +239,6 @@ static void stop_progress_engine(pmix_progress_tracker_t *trk) { assert(trk->ev_active); trk->ev_active = false; - /* break the event loop - this will cause the loop to exit upon completion of any current event */ pmix_event_base_loopexit(trk->ev_base); @@ -192,6 +312,11 @@ pmix_event_base_t *pmix_progress_thread_init(const char *name) dummy_timeout_cb, trk); pmix_event_add(&trk->block, &long_timeout); +#if PMIX_HAVE_LIBEV + ev_async_init (&trk->async, pmix_libev_ev_async_cb); + ev_async_start((struct ev_loop *)trk->ev_base, &trk->async); +#endif + /* construct the thread object */ PMIX_CONSTRUCT(&trk->engine, pmix_thread_t); trk->engine_constructed = true; @@ -302,6 +427,21 @@ int pmix_progress_thread_pause(const char *name) return PMIX_ERR_NOT_FOUND; } +#if PMIX_HAVE_LIBEV +static pmix_progress_tracker_t* pmix_progress_tracker_get_by_base(pmix_event_base_t *base) { + pmix_progress_tracker_t *trk; + + if (inited) { + PMIX_LIST_FOREACH(trk, &tracking, pmix_progress_tracker_t) { + if(trk->ev_base == base) { + return trk; + } + } + } + return NULL; +} +#endif + int pmix_progress_thread_resume(const char *name) { pmix_progress_tracker_t *trk; diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c index f827018d712..2ea33a056c1 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c @@ -50,7 +50,9 @@ #include #include #include PMIX_EVENT_HEADER +#if ! PMIX_HAVE_LIBEV #include PMIX_EVENT2_THREAD_HEADER +#endif #include "src/util/argv.h" #include "src/util/error.h" @@ -97,7 +99,6 @@ pmix_status_t pmix_server_initialize(void) PMIX_CONSTRUCT(&pmix_server_globals.gdata, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.events, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.local_reqs, pmix_list_t); - PMIX_CONSTRUCT(&pmix_server_globals.nspaces, pmix_list_t); PMIX_CONSTRUCT(&pmix_server_globals.iof, pmix_list_t); pmix_output_verbose(2, pmix_server_globals.base_output, @@ -361,7 +362,7 @@ PMIX_EXPORT pmix_status_t PMIx_server_init(pmix_server_module_t *module, pmix_globals.mypeer->nptr = PMIX_NEW(pmix_namespace_t); /* ensure our own nspace is first on the list */ PMIX_RETAIN(pmix_globals.mypeer->nptr); - pmix_list_prepend(&pmix_server_globals.nspaces, &pmix_globals.mypeer->nptr->super); + pmix_list_prepend(&pmix_globals.nspaces, &pmix_globals.mypeer->nptr->super); } pmix_globals.mypeer->nptr->nspace = strdup(pmix_globals.myid.nspace); rinfo->pname.nspace = strdup(pmix_globals.mypeer->nptr->nspace); @@ -474,13 +475,12 @@ PMIX_EXPORT pmix_status_t PMIx_server_finalize(void) PMIX_LIST_DESTRUCT(&pmix_server_globals.local_reqs); PMIX_LIST_DESTRUCT(&pmix_server_globals.gdata); PMIX_LIST_DESTRUCT(&pmix_server_globals.events); - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { /* ensure that we do the specified cleanup - if this is an * abnormal termination, then the nspace object may not be * at zero refcount */ pmix_execute_epilog(&ns->epilog); } - PMIX_LIST_DESTRUCT(&pmix_server_globals.nspaces); PMIX_LIST_DESTRUCT(&pmix_server_globals.iof); pmix_hwloc_cleanup(); @@ -547,7 +547,7 @@ static void _register_nspace(int sd, short args, void *cbdata) /* see if we already have this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, cd->proc.nspace)) { nptr = tmp; break; @@ -560,7 +560,7 @@ static void _register_nspace(int sd, short args, void *cbdata) goto release; } nptr->nspace = strdup(cd->proc.nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } nptr->nlocalprocs = cd->nlocalprocs; @@ -764,12 +764,12 @@ static void _deregister_nspace(int sd, short args, void *cbdata) pmix_server_purge_events(NULL, &cd->proc); /* release this nspace */ - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (PMIX_CHECK_NSPACE(tmp->nspace, cd->proc.nspace)) { /* perform any nspace-level epilog */ pmix_execute_epilog(&tmp->epilog); /* remove and release it */ - pmix_list_remove_item(&pmix_server_globals.nspaces, &tmp->super); + pmix_list_remove_item(&pmix_globals.nspaces, &tmp->super); PMIX_RELEASE(tmp); break; } @@ -997,7 +997,7 @@ static void _register_client(int sd, short args, void *cbdata) /* see if we already have this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(ns->nspace, cd->proc.nspace)) { nptr = ns; break; @@ -1010,7 +1010,7 @@ static void _register_client(int sd, short args, void *cbdata) goto cleanup; } nptr->nspace = strdup(cd->proc.nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } /* setup a peer object for this client - since the host server * only deals with the original processes and not any clones, @@ -1051,7 +1051,7 @@ static void _register_client(int sd, short args, void *cbdata) * if the nspaces are all defined */ if (all_def) { /* so far, they have all been defined - check this one */ - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 < ns->nlocalprocs && 0 == strcmp(trk->pcs[i].nspace, ns->nspace)) { all_def = ns->all_registered; @@ -1166,7 +1166,7 @@ static void _deregister_client(int sd, short args, void *cbdata) /* see if we already have this nspace */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, cd->proc.nspace)) { nptr = tmp; break; @@ -1368,15 +1368,15 @@ static void _dmodex_req(int sd, short args, void *cbdata) PMIX_ACQUIRE_OBJECT(cd); pmix_output_verbose(2, pmix_server_globals.base_output, - "DMODX LOOKING FOR %s:%d", - cd->proc.nspace, cd->proc.rank); + "DMODX LOOKING FOR %s", + PMIX_NAME_PRINT(&cd->proc)); /* this should be one of my clients, but a race condition * could cause this request to arrive prior to us having * been informed of it - so first check to see if we know * about this nspace yet */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(ns->nspace, cd->proc.nspace)) { nptr = ns; break; @@ -1502,8 +1502,9 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, } pmix_output_verbose(2, pmix_server_globals.base_output, - "pmix:server dmodex request%s:%d", - proc->nspace, proc->rank); + "%s pmix:server dmodex request for proc %s", + PMIX_NAME_PRINT(&pmix_globals.myid), + PMIX_NAME_PRINT(proc)); cd = PMIX_NEW(pmix_setup_caddy_t); pmix_strncpy(cd->proc.nspace, proc->nspace, PMIX_MAX_NSLEN); diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c index 56c05308014..c8fe13cdd6e 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_get.c @@ -1,8 +1,8 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * Copyright (c) 2014-2015 Artem Y. Polyakov . * All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. @@ -51,6 +51,7 @@ #include "src/mca/gds/gds.h" #include "src/util/argv.h" #include "src/util/error.h" +#include "src/util/name_fns.h" #include "src/util/output.h" #include "src/util/pmix_environ.h" @@ -126,6 +127,7 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, pmix_dmdx_request_t *req; bool local; bool localonly = false; + bool diffnspace = false; struct timeval tv = {0, 0}; pmix_buffer_t pbkt, pkt; pmix_byte_object_t bo; @@ -133,10 +135,10 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, pmix_proc_t proc; char *data; size_t sz, n; - pmix_peer_t *peer; pmix_output_verbose(2, pmix_server_globals.get_output, - "recvd GET"); + "%s recvd GET", + PMIX_NAME_PRINT(&pmix_globals.myid)); /* setup */ memset(nspace, 0, sizeof(nspace)); @@ -191,13 +193,19 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, /* find the nspace object for this client */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(nspace, ns->nspace)) { nptr = ns; break; } } + /* check if the nspace of the requestor is different from + * the nspace of the target process */ + if (!PMIX_CHECK_NSPACE(nspace, cd->peer->info->pname.nspace)) { + diffnspace = true; + } + pmix_output_verbose(2, pmix_server_globals.get_output, "%s:%d EXECUTE GET FOR %s:%d ON BEHALF OF %s:%d", pmix_globals.myid.nspace, @@ -294,10 +302,10 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, return PMIX_SUCCESS; } - /* this nspace is known, so we can process the request. - * if the rank is wildcard, then they are asking for the - * job-level info for this nspace - provide it */ - if (PMIX_RANK_WILDCARD == rank) { + /* the target nspace is known, so we can process the request. + * if the rank is wildcard, or the nspace is different, then + * they are asking for the job-level info for this nspace - provide it */ + if (PMIX_RANK_WILDCARD == rank || diffnspace) { /* see if we have the job-level info - we won't have it * if we have no local procs and haven't already asked * for it, so there is no guarantee we have it */ @@ -309,21 +317,32 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, * can retrieve the info from that GDS. Otherwise, * we need to retrieve it from our own */ PMIX_CONSTRUCT(&cb, pmix_cb_t); - peer = pmix_globals.mypeer; /* this data is for a local client, so give the gds the * option of returning a complete copy of the data, * or returning a pointer to local storage */ cb.proc = &proc; cb.scope = PMIX_SCOPE_UNDEF; cb.copy = false; - PMIX_GDS_FETCH_KV(rc, peer, &cb); + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); if (PMIX_SUCCESS != rc) { PMIX_DESTRUCT(&cb); return rc; } + /* if the requested rank is not WILDCARD, then retrieve the + * job-specific data for that rank - a scope of UNDEF + * will direct the GDS to provide it. Anything found will + * simply be added to the cb.kvs list */ + if (PMIX_RANK_WILDCARD != rank) { + proc.rank = rank; + PMIX_GDS_FETCH_KV(rc, pmix_globals.mypeer, &cb); + if (PMIX_SUCCESS != rc) { + PMIX_DESTRUCT(&cb); + return rc; + } + } PMIX_CONSTRUCT(&pkt, pmix_buffer_t); /* assemble the provided data into a byte object */ - PMIX_GDS_ASSEMB_KVS_REQ(rc, peer, &proc, &cb.kvs, &pkt, cd); + PMIX_GDS_ASSEMB_KVS_REQ(rc, pmix_globals.mypeer, &proc, &cb.kvs, &pkt, cd); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_DESTRUCT(&cb); @@ -333,7 +352,7 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, PMIX_DESTRUCT(&pkt); /* pack it into the payload */ PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); - PMIX_BFROPS_PACK(rc, cd->peer, &pbkt, &bo, 1, PMIX_BYTE_OBJECT); + PMIX_BFROPS_PACK(rc, pmix_globals.mypeer, &pbkt, &bo, 1, PMIX_BYTE_OBJECT); free(bo.bytes); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); @@ -604,6 +623,7 @@ static pmix_status_t _satisfy_request(pmix_namespace_t *nptr, pmix_rank_t rank, char *data = NULL; size_t sz = 0; pmix_scope_t scope = PMIX_SCOPE_UNDEF; + bool diffnspace = false; pmix_output_verbose(2, pmix_server_globals.get_output, "%s:%d SATISFY REQUEST CALLED", @@ -617,10 +637,18 @@ static pmix_status_t _satisfy_request(pmix_namespace_t *nptr, pmix_rank_t rank, PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); pmix_strncpy(proc.nspace, nptr->nspace, PMIX_MAX_NSLEN); - /* if we have local clients of this nspace, then we use - * the corresponding GDS to retrieve the data. Otherwise, - * the data will have been stored under our GDS */ - if (0 < nptr->nlocalprocs) { + if (!PMIX_CHECK_NSPACE(nptr->nspace, cd->peer->info->pname.nspace)) { + diffnspace = true; + } + + /* if rank is PMIX_RANK_UNDEF, then it was stored in our GDS */ + if (PMIX_RANK_UNDEF == rank) { + scope = PMIX_GLOBAL; // we have to search everywhere + peer = pmix_globals.mypeer; + } else if (0 < nptr->nlocalprocs) { + /* if we have local clients of this nspace, then we use + * the corresponding GDS to retrieve the data. Otherwise, + * the data will have been stored under our GDS */ if (local) { *local = true; } @@ -660,8 +688,7 @@ static pmix_status_t _satisfy_request(pmix_namespace_t *nptr, pmix_rank_t rank, /* if they are asking about a rank from an nspace different * from their own, or they gave a rank of "wildcard", then * include a copy of the job-level info */ - if (PMIX_RANK_WILDCARD == rank || - 0 != strncmp(nptr->nspace, cd->peer->info->pname.nspace, PMIX_MAX_NSLEN)) { + if (PMIX_RANK_WILDCARD == rank || diffnspace) { proc.rank = PMIX_RANK_WILDCARD; PMIX_CONSTRUCT(&cb, pmix_cb_t); /* this data is requested by a local client, so give the gds the option @@ -674,7 +701,7 @@ static pmix_status_t _satisfy_request(pmix_namespace_t *nptr, pmix_rank_t rank, if (PMIX_SUCCESS == rc) { PMIX_CONSTRUCT(&pkt, pmix_buffer_t); /* assemble the provided data into a byte object */ - PMIX_GDS_ASSEMB_KVS_REQ(rc, cd->peer, &proc, &cb.kvs, &pkt, cd); + PMIX_GDS_ASSEMB_KVS_REQ(rc, pmix_globals.mypeer, &proc, &cb.kvs, &pkt, cd); if (rc != PMIX_SUCCESS) { PMIX_ERROR_LOG(rc); PMIX_DESTRUCT(&pkt); @@ -720,7 +747,8 @@ static pmix_status_t _satisfy_request(pmix_namespace_t *nptr, pmix_rank_t rank, /* retrieve the data for the specific rank they are asking about */ if (PMIX_RANK_WILDCARD != rank) { - if (!PMIX_PROC_IS_SERVER(peer) && !peer->commit_cnt) { + if (!PMIX_PROC_IS_SERVER(peer) && 0 == peer->commit_cnt) { + PMIX_ERROR_LOG(PMIX_ERR_NOT_FOUND); /* this condition works only for local requests, server does * count commits for local ranks, and check this count when * local request. @@ -743,7 +771,11 @@ static pmix_status_t _satisfy_request(pmix_namespace_t *nptr, pmix_rank_t rank, found = true; PMIX_CONSTRUCT(&pkt, pmix_buffer_t); /* assemble the provided data into a byte object */ - PMIX_GDS_ASSEMB_KVS_REQ(rc, cd->peer, &proc, &cb.kvs, &pkt, cd); + if (PMIX_RANK_UNDEF == rank || diffnspace) { + PMIX_GDS_ASSEMB_KVS_REQ(rc, pmix_globals.mypeer, &proc, &cb.kvs, &pkt, cd); + } else { + PMIX_GDS_ASSEMB_KVS_REQ(rc, cd->peer, &proc, &cb.kvs, &pkt, cd); + } if (rc != PMIX_SUCCESS) { PMIX_ERROR_LOG(rc); PMIX_DESTRUCT(&pkt); @@ -789,6 +821,7 @@ static pmix_status_t _satisfy_request(pmix_namespace_t *nptr, pmix_rank_t rank, } PMIX_DESTRUCT(&cb); } + PMIX_UNLOAD_BUFFER(&pbkt, data, sz); PMIX_DESTRUCT(&pbkt); @@ -896,7 +929,7 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) /* find the nspace object for the proc whose data is being received */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(caddy->lcd->proc.nspace, ns->nspace)) { nptr = ns; break; @@ -910,7 +943,7 @@ static void _process_dmdx_reply(int fd, short args, void *cbdata) nptr = PMIX_NEW(pmix_namespace_t); nptr->nspace = strdup(caddy->lcd->proc.nspace); /* add to the list */ - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } /* if the request was successfully satisfied, then store the data. diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c index 5f7ad645f86..37ec6c5b412 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server_ops.c @@ -425,7 +425,7 @@ static pmix_server_trkr_t* new_tracker(char *id, pmix_proc_t *procs, } /* is this nspace known to us? */ nptr = NULL; - PMIX_LIST_FOREACH(ns, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(ns, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(procs[i].nspace, ns->nspace)) { nptr = ns; break; @@ -583,10 +583,15 @@ pmix_status_t pmix_server_fence(pmix_server_caddy_t *cd, /* see if we are to collect data or enforce a timeout - we don't internally care * about any other directives */ for (n=0; n < ninfo; n++) { - if (0 == strcmp(info[n].key, PMIX_COLLECT_DATA)) { - collect_data = true; - } else if (0 == strncmp(info[n].key, PMIX_TIMEOUT, PMIX_MAX_KEYLEN)) { - tv.tv_sec = info[n].value.data.uint32; + if (PMIX_CHECK_KEY(&info[n], PMIX_COLLECT_DATA)) { + collect_data = PMIX_INFO_TRUE(&info[n]); + } else if (PMIX_CHECK_KEY(&info[n], PMIX_TIMEOUT)) { + PMIX_VALUE_GET_NUMBER(rc, &info[n].value, tv.tv_sec, uint32_t); + if (PMIX_SUCCESS != rc) { + PMIX_PROC_FREE(procs, nprocs); + PMIX_INFO_FREE(info, ninfo); + return rc; + } } } } @@ -2738,7 +2743,7 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, for (n=0; n < cd->ntargets; n++) { /* find the nspace of this proc */ nptr = NULL; - PMIX_LIST_FOREACH(tmp, &pmix_server_globals.nspaces, pmix_namespace_t) { + PMIX_LIST_FOREACH(tmp, &pmix_globals.nspaces, pmix_namespace_t) { if (0 == strcmp(tmp->nspace, cd->targets[n].nspace)) { nptr = tmp; break; @@ -2751,7 +2756,7 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, goto exit; } nptr->nspace = strdup(cd->targets[n].nspace); - pmix_list_append(&pmix_server_globals.nspaces, &nptr->super); + pmix_list_append(&pmix_globals.nspaces, &nptr->super); } /* if the rank is wildcard, then we use the epilog for the nspace */ if (PMIX_RANK_WILDCARD == cd->targets[n].rank) { @@ -2805,7 +2810,7 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, cnt = 0; // track how many infos are cleanup related for (n=0; n < cd->ninfo; n++) { - if (0 == strncmp(cd->info[n].key, PMIX_REGISTER_CLEANUP, PMIX_MAX_KEYLEN)) { + if (PMIX_CHECK_KEY(&cd->info[n], PMIX_REGISTER_CLEANUP)) { ++cnt; if (PMIX_STRING != cd->info[n].value.type || NULL == cd->info[n].value.data.string) { @@ -2821,7 +2826,7 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, } cf->path = strdup(cd->info[n].value.data.string); pmix_list_append(&cachefiles, &cf->super); - } else if (0 == strncmp(cd->info[n].key, PMIX_REGISTER_CLEANUP_DIR, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cd->info[n], PMIX_REGISTER_CLEANUP_DIR)) { ++cnt; if (PMIX_STRING != cd->info[n].value.type || NULL == cd->info[n].value.data.string) { @@ -2837,10 +2842,10 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, } cdir->path = strdup(cd->info[n].value.data.string); pmix_list_append(&cachedirs, &cdir->super); - } else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_RECURSIVE, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cd->info[n], PMIX_CLEANUP_RECURSIVE)) { recurse = PMIX_INFO_TRUE(&cd->info[n]); ++cnt; - } else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_IGNORE, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cd->info[n], PMIX_CLEANUP_IGNORE)) { if (PMIX_STRING != cd->info[n].value.type || NULL == cd->info[n].value.data.string) { /* return an error */ @@ -2856,7 +2861,7 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer, cf->path = strdup(cd->info[n].value.data.string); pmix_list_append(&ignorefiles, &cf->super); ++cnt; - } else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_LEAVE_TOPDIR, PMIX_MAX_KEYLEN)) { + } else if (PMIX_CHECK_KEY(&cd->info[n], PMIX_CLEANUP_LEAVE_TOPDIR)) { leave_topdir = PMIX_INFO_TRUE(&cd->info[n]); ++cnt; } @@ -3274,8 +3279,7 @@ pmix_status_t pmix_server_iofreg(pmix_peer_t *peer, continue; } /* do we already have this source for this peer? */ - if (0 == strncmp(cd->procs[n].nspace, req->pname.nspace, PMIX_MAX_NSLEN) && - (PMIX_RANK_WILDCARD == req->pname.rank || cd->procs[n].rank == req->pname.rank)) { + if (PMIX_CHECK_PROCID(&cd->procs[n], &req->pname)) { match = true; if ((req->channels & cd->channels) != cd->channels) { /* this is a channel update */ diff --git a/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c b/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c index d2b9c9acbe5..585ea08fe49 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c +++ b/opal/mca/pmix/pmix3x/pmix/src/tool/pmix_tool.c @@ -50,7 +50,9 @@ #endif /* HAVE_DIRENT_H */ #include PMIX_EVENT_HEADER +#if ! PMIX_HAVE_LIBEV #include PMIX_EVENT2_THREAD_HEADER +#endif #include "src/class/pmix_list.h" #include "src/util/argv.h" @@ -789,7 +791,7 @@ pmix_status_t pmix_tool_init_info(void) pmix_kval_t *kptr; pmix_status_t rc; pmix_proc_t wildcard; - char hostname[PMIX_MAX_NSLEN]; + char hostname[PMIX_MAXHOSTNAMELEN] = {0}; pmix_strncpy(wildcard.nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN); wildcard.rank = pmix_globals.myid.rank; @@ -1027,9 +1029,9 @@ pmix_status_t pmix_tool_init_info(void) /* hostname */ if (NULL != pmix_globals.hostname) { - pmix_strncpy(hostname, pmix_globals.hostname, PMIX_MAX_NSLEN); + pmix_strncpy(hostname, pmix_globals.hostname, PMIX_MAXHOSTNAMELEN); } else { - gethostname(hostname, PMIX_MAX_NSLEN); + gethostname(hostname, PMIX_MAXHOSTNAMELEN-1); } kptr = PMIX_NEW(pmix_kval_t); kptr->key = strdup(PMIX_HOSTNAME); @@ -1227,7 +1229,6 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void) PMIX_LIST_DESTRUCT(&pmix_server_globals.local_reqs); PMIX_LIST_DESTRUCT(&pmix_server_globals.gdata); PMIX_LIST_DESTRUCT(&pmix_server_globals.events); - PMIX_LIST_DESTRUCT(&pmix_server_globals.nspaces); PMIX_LIST_DESTRUCT(&pmix_server_globals.iof); } diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/compress.c b/opal/mca/pmix/pmix3x/pmix/src/util/compress.c index 867a3d5e57d..d71cdf37c63 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/compress.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * @@ -31,6 +31,7 @@ bool pmix_util_compress_string(char *instring, size_t len, outlen; uint8_t *tmp, *ptr; uint32_t inlen; + int rc; /* set default output */ *outbytes = NULL; @@ -43,7 +44,6 @@ bool pmix_util_compress_string(char *instring, /* get an upper bound on the required output storage */ len = deflateBound(&strm, inlen); if (NULL == (tmp = (uint8_t*)malloc(len))) { - *outbytes = NULL; return false; } strm.next_in = (uint8_t*)instring; @@ -54,8 +54,12 @@ bool pmix_util_compress_string(char *instring, strm.avail_out = len; strm.next_out = tmp; - deflate (&strm, Z_FINISH); + rc = deflate (&strm, Z_FINISH); deflateEnd (&strm); + if (Z_OK != rc) { + free(tmp); + return false; + } /* allocate 4 bytes beyond the size reqd by zlib so we * can pass the size of the uncompressed string to the diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/name_fns.c b/opal/mca/pmix/pmix3x/pmix/src/util/name_fns.c index 14f19aef022..96b46ea9d58 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/name_fns.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/name_fns.c @@ -12,7 +12,7 @@ * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -98,6 +98,7 @@ char* pmix_util_print_name_args(const pmix_proc_t *name) { pmix_print_args_buffers_t *ptr; char *rank; + int index; /* get the next buffer */ ptr = get_print_name_buffer(); @@ -105,29 +106,36 @@ char* pmix_util_print_name_args(const pmix_proc_t *name) PMIX_ERROR_LOG(PMIX_ERR_OUT_OF_RESOURCE); return pmix_print_args_null; } - /* cycle around the ring */ - if (PMIX_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { - ptr->cntr = 0; - } /* protect against NULL names */ if (NULL == name) { - snprintf(ptr->buffers[ptr->cntr++], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "[NO-NAME]"); - return ptr->buffers[ptr->cntr-1]; + index = ptr->cntr; + snprintf(ptr->buffers[index], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "[NO-NAME]"); + ptr->cntr++; + if (PMIX_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { + ptr->cntr = 0; + } + return ptr->buffers[index]; } rank = pmix_util_print_rank(name->rank); - snprintf(ptr->buffers[ptr->cntr++], + index = ptr->cntr; + snprintf(ptr->buffers[index], PMIX_PRINT_NAME_ARGS_MAX_SIZE, - "[%s,%s]", name->nspace, rank); + "[%s:%s]", name->nspace, rank); + ptr->cntr++; + if (PMIX_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { + ptr->cntr = 0; + } - return ptr->buffers[ptr->cntr-1]; + return ptr->buffers[index]; } char* pmix_util_print_rank(const pmix_rank_t vpid) { pmix_print_args_buffers_t *ptr; + int index; ptr = get_print_name_buffer(); @@ -136,19 +144,19 @@ char* pmix_util_print_rank(const pmix_rank_t vpid) return pmix_print_args_null; } - /* cycle around the ring */ - if (PMIX_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { - ptr->cntr = 0; - } - + index = ptr->cntr; if (PMIX_RANK_UNDEF == vpid) { - snprintf(ptr->buffers[ptr->cntr++], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "UNDEF"); + snprintf(ptr->buffers[index], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "UNDEF"); } else if (PMIX_RANK_WILDCARD == vpid) { - snprintf(ptr->buffers[ptr->cntr++], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "WILDCARD"); + snprintf(ptr->buffers[index], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "WILDCARD"); } else { - snprintf(ptr->buffers[ptr->cntr++], + snprintf(ptr->buffers[index], PMIX_PRINT_NAME_ARGS_MAX_SIZE, "%ld", (long)vpid); } - return ptr->buffers[ptr->cntr-1]; + ptr->cntr++; + if (PMIX_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { + ptr->cntr = 0; + } + return ptr->buffers[index]; } diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/output.c b/opal/mca/pmix/pmix3x/pmix/src/util/output.c index cf73f507008..4e90280c8ae 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/output.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/output.c @@ -125,7 +125,7 @@ PMIX_CLASS_INSTANCE(pmix_output_stream_t, pmix_object_t, construct, destruct); bool pmix_output_init(void) { int i; - char hostname[PMIX_MAXHOSTNAMELEN]; + char hostname[PMIX_MAXHOSTNAMELEN] = {0}; char *str; if (initialized) { @@ -176,7 +176,7 @@ bool pmix_output_init(void) } else { verbose.lds_want_stderr = true; } - gethostname(hostname, sizeof(hostname)); + gethostname(hostname, sizeof(hostname)-1); hostname[sizeof(hostname)-1] = '\0'; if (0 > asprintf(&verbose.lds_prefix, "[%s:%05d] ", hostname, getpid())) { return PMIX_ERR_NOMEM; @@ -256,7 +256,7 @@ bool pmix_output_switch(int output_id, bool enable) void pmix_output_reopen_all(void) { char *str; - char hostname[PMIX_MAXHOSTNAMELEN]; + char hostname[PMIX_MAXHOSTNAMELEN] = {0}; str = getenv("PMIX_OUTPUT_STDERR_FD"); if (NULL != str) { diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/pif.h b/opal/mca/pmix/pmix3x/pmix/src/util/pif.h index fb9f1b79a24..57ed1bfd749 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/pif.h +++ b/opal/mca/pmix/pmix3x/pmix/src/util/pif.h @@ -13,7 +13,7 @@ * reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,9 +38,7 @@ #include #endif -#ifndef IF_NAMESIZE -#define IF_NAMESIZE 32 -#endif +#define PMIX_IF_NAMESIZE 256 BEGIN_C_DECLS diff --git a/opal/mca/pmix/pmix3x/pmix/src/util/pmix_environ.c b/opal/mca/pmix/pmix3x/pmix/src/util/pmix_environ.c index 1e1cfaaa880..2662a86bff7 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/util/pmix_environ.c +++ b/opal/mca/pmix/pmix3x/pmix/src/util/pmix_environ.c @@ -12,8 +12,10 @@ * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. + * Copyright (c) 2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,10 +32,12 @@ #include #include "src/util/printf.h" +#include "src/util/error.h" #include "src/util/argv.h" #include "src/util/pmix_environ.h" #define PMIX_DEFAULT_TMPDIR "/tmp" +#define PMIX_MAX_ENVAR_LENGTH 100000 /* * Merge two environ-like char arrays, ensuring that there are no @@ -74,7 +78,7 @@ char **pmix_environ_merge(char **minor, char **major) pmix_setenv(minor[i], NULL, false, &ret); } else { - /* strdup minor[i] in case it's a constat string */ + /* strdup minor[i] in case it's a constant string */ name = strdup(minor[i]); value = name + (value - minor[i]); @@ -99,9 +103,60 @@ char **pmix_environ_merge(char **minor, char **major) int i; char *newvalue, *compare; size_t len; + bool valid; - /* Make the new value */ + /* Check the bozo case */ + if( NULL == env ) { + return PMIX_ERR_BAD_PARAM; + } + if (NULL != value) { + /* check the string for unacceptable length - i.e., ensure + * it is NULL-terminated */ + valid = false; + for (i=0; i < PMIX_MAX_ENVAR_LENGTH; i++) { + if ('\0' == value[i]) { + valid = true; + break; + } + } + if (!valid) { + PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); + return PMIX_ERR_BAD_PARAM; + } + } + + /* If this is the "environ" array, use putenv or setenv */ + if (*env == environ) { + /* THIS IS POTENTIALLY A MEMORY LEAK! But I am doing it + because so that we don't violate the law of least + astonishmet for PMIX developers (i.e., those that don't + check the return code of pmix_setenv() and notice that we + returned an error if you passed in the real environ) */ +#if defined (HAVE_SETENV) + if (NULL == value) { + /* this is actually an unsetenv request */ + unsetenv(name); + } else { + setenv(name, value, overwrite); + } +#else + /* Make the new value */ + if (NULL == value) { + i = asprintf(&newvalue, "%s=", name); + } else { + i = asprintf(&newvalue, "%s=%s", name, value); + } + if (NULL == newvalue || 0 > i) { + return PMIX_ERR_OUT_OF_RESOURCE; + } + putenv(newvalue); + /* cannot free it as putenv doesn't copy the value */ +#endif + return PMIX_SUCCESS; + } + + /* Make the new value */ if (NULL == value) { i = asprintf(&newvalue, "%s=", name); } else { @@ -111,28 +166,13 @@ char **pmix_environ_merge(char **minor, char **major) return PMIX_ERR_OUT_OF_RESOURCE; } - /* Check the bozo case */ - - if( NULL == env ) { - return PMIX_ERR_BAD_PARAM; - } else if (NULL == *env) { + if (NULL == *env) { i = 0; pmix_argv_append(&i, env, newvalue); free(newvalue); return PMIX_SUCCESS; } - /* If this is the "environ" array, use putenv */ - if( *env == environ ) { - /* THIS IS POTENTIALLY A MEMORY LEAK! But I am doing it - because so that we don't violate the law of least - astonishmet for PMIX developers (i.e., those that don't - check the return code of pmix_setenv() and notice that we - returned an error if you passed in the real environ) */ - putenv(newvalue); - return PMIX_SUCCESS; - } - /* Make something easy to compare to */ i = asprintf(&compare, "%s=", name); diff --git a/opal/mca/pmix/pmix3x/pmix/test/cli_stages.c b/opal/mca/pmix/pmix3x/pmix/test/cli_stages.c index ecd41c2bd42..5fbfec419dc 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/cli_stages.c +++ b/opal/mca/pmix/pmix3x/pmix/test/cli_stages.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2018 Mellanox Technologies, Inc. @@ -58,7 +58,7 @@ void cli_init(int nprocs) } } -void cli_connect(cli_info_t *cli, int sd, struct event_base * ebase, event_callback_fn callback) +void cli_connect(cli_info_t *cli, int sd, pmix_event_base_t * ebase, event_callback_fn callback) { if( CLI_CONNECTED != cli->next_state[cli->state] ){ TEST_ERROR(("Rank %d has bad next state: expect %d have %d!", @@ -68,9 +68,9 @@ void cli_connect(cli_info_t *cli, int sd, struct event_base * ebase, event_callb } cli->sd = sd; - cli->ev = event_new(ebase, sd, - EV_READ|EV_PERSIST, callback, cli); - event_add(cli->ev,NULL); + cli->ev = pmix_event_new(ebase, sd, + EV_READ|EV_PERSIST, callback, cli); + pmix_event_add(cli->ev,NULL); pmix_ptl_base_set_nonblocking(sd); TEST_VERBOSE(("Connection accepted from rank %d", cli_rank(cli) )); cli->state = CLI_CONNECTED; @@ -105,12 +105,12 @@ void cli_disconnect(cli_info_t *cli) } if( NULL == cli->ev ){ - TEST_ERROR(("Bad ev = NULL of rank = %d ", cli->sd, cli_rank(cli))); + TEST_ERROR(("Bad ev = NULL of rank = %d ", cli_rank(cli))); test_abort = true; } else { TEST_VERBOSE(("remove event of rank %d from event queue", cli_rank(cli))); - event_del(cli->ev); - event_free(cli->ev); + pmix_event_del(cli->ev); + pmix_event_free(cli->ev); cli->ev = NULL; } diff --git a/opal/mca/pmix/pmix3x/pmix/test/cli_stages.h b/opal/mca/pmix/pmix3x/pmix/test/cli_stages.h index b716480b0a9..011023d7a79 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/cli_stages.h +++ b/opal/mca/pmix/pmix3x/pmix/test/cli_stages.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2018 Mellanox Technologies, Inc. @@ -56,7 +56,7 @@ extern bool test_abort; int cli_rank(cli_info_t *cli); void cli_init(int nprocs); -void cli_connect(cli_info_t *cli, int sd, struct event_base * ebase, event_callback_fn callback); +void cli_connect(cli_info_t *cli, int sd, pmix_event_base_t * ebase, event_callback_fn callback); void cli_finalize(cli_info_t *cli); void cli_disconnect(cli_info_t *cli); void cli_terminate(cli_info_t *cli); diff --git a/opal/mca/pmix/pmix3x/pmix/test/pmi_client.c b/opal/mca/pmix/pmix3x/pmix/test/pmi_client.c index ad21f6db3cc..819429b4fd7 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/pmi_client.c +++ b/opal/mca/pmix/pmix3x/pmix/test/pmi_client.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ @@ -24,6 +24,9 @@ static int _legacy = 0; /* Verbose level 0-silent, 1-fatal, 2-error, 3+ debug*/ static int _verbose = 1; +static int spawned, size, rank=-1, appnum; +static char jobid[255]; + static void log_fatal(const char *format, ...) { va_list arglist; @@ -36,7 +39,7 @@ static void log_fatal(const char *format, ...) va_end(arglist); return; } - fprintf(stderr, "FATAL: %s", output); + fprintf(stderr, "%d:FATAL: %s", rank, output); free(output); } va_end(arglist); @@ -54,7 +57,7 @@ static void log_error(const char *format, ...) va_end(arglist); return; } - fprintf(stderr, "ERROR: %s", output); + fprintf(stderr, "%d:ERROR: %s", rank, output); free(output); } va_end(arglist); @@ -72,7 +75,7 @@ static void log_info(const char *format, ...) va_end(arglist); return; } - fprintf(stderr, "INFO: %s", output); + fprintf(stderr, "%d:INFO: %s", rank, output); free(output); } va_end(arglist); @@ -81,7 +84,7 @@ static void log_info(const char *format, ...) #define log_assert(e, msg) \ do { \ if (!(e)) { \ - log_fatal("%s at %s:%d\n", msg, __func__, __LINE__); \ + log_fatal("%d:%s at %s:%d\n", rank, msg, __func__, __LINE__); \ rc = -1; \ } \ } while (0) @@ -99,10 +102,6 @@ static int test_item5(void); static int test_item6(void); static int test_item7(void); -static int spawned, size, rank, appnum; -static char jobid[255]; - - int main(int argc, char **argv) { int ret = 0; @@ -372,21 +371,24 @@ static int test_item6(void) { int rc = 0; char val[100]; - const char *tkey = __func__; + char *tkey; const char *tval = __FILE__; + asprintf(&tkey, "%d:%s", rank, __func__); if (PMI_SUCCESS != (rc = PMI_KVS_Put(jobid, tkey, tval))) { log_fatal("PMI_KVS_Put %d\n", rc); + free(tkey); return rc; } if (PMI_SUCCESS != (rc = PMI_KVS_Get(jobid, tkey, val, sizeof(val)))) { log_fatal("PMI_KVS_Get %d\n", rc); + free(tkey); return rc; } log_info("tkey=%s tval=%s val=%s\n", tkey, tval, val); - + free(tkey); log_assert(!strcmp(tval, val), "value does not meet expectation"); return rc; @@ -398,16 +400,16 @@ static int test_item7(void) char tkey[100]; char tval[100]; char val[100]; - int i = 0; + int i = 0, j; + +log_info("TEST7\n"); for (i = 0; i < size; i++) { - sprintf(tkey, "KEY-%d", i); + sprintf(tkey, "%d:KEY-%d", rank, i); sprintf(tval, "VALUE-%d", i); - if (i == rank) { - if (PMI_SUCCESS != (rc = PMI_KVS_Put(jobid, tkey, tval))) { - log_fatal("PMI_KVS_Put [%s=%s] %d\n", tkey, tval, rc); - return rc; - } + if (PMI_SUCCESS != (rc = PMI_KVS_Put(jobid, tkey, tval))) { + log_fatal("PMI_KVS_Put [%s=%s] %d\n", tkey, tval, rc); + return rc; } } @@ -416,22 +418,27 @@ static int test_item7(void) return rc; } + + log_info("BARRIER\n"); if (PMI_SUCCESS != (rc = PMI_Barrier())) { log_fatal("PMI_Barrier %d\n", rc); return rc; } for (i = 0; i < size; i++) { - sprintf(tkey, "KEY-%d", i); - sprintf(tval, "VALUE-%d", i); - if (PMI_SUCCESS != (rc = PMI_KVS_Get(jobid, tkey, val, sizeof(val)))) { - log_fatal("PMI_KVS_Get [%s=?] %d\n", tkey, rc); - return rc; - } + for (j=0; j < size; j++) { + sprintf(tkey, "%d:KEY-%d", i, j); + sprintf(tval, "VALUE-%d", j); + log_info("Get key %s\n", tkey); + if (PMI_SUCCESS != (rc = PMI_KVS_Get(jobid, tkey, val, sizeof(val)))) { + log_fatal("PMI_KVS_Get [%s=?] %d\n", tkey, rc); + return rc; + } - log_info("tkey=%s tval=%s val=%s\n", tkey, tval, val); + log_info("tkey=%s tval=%s val=%s\n", tkey, tval, val); - log_assert(!strcmp(tval, val), "value does not meet expectation"); + log_assert(!strcmp(tval, val), "value does not meet expectation"); + } } return rc; diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/Makefile.am b/opal/mca/pmix/pmix3x/pmix/test/simple/Makefile.am index 8ee50d6ca8a..5ab9f568bb0 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/Makefile.am +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -85,7 +85,7 @@ simplegacy_SOURCES = $(headers) \ simplegacy.c simplegacy_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS) simplegacy_LDADD = \ - $(top_builddir)/src/libpmix.la + $(top_builddir)/src/libpmi.la simptimeout_SOURCES = $(headers) \ simptimeout.c diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/gwtest.c b/opal/mca/pmix/pmix3x/pmix/test/simple/gwtest.c index 2f1fae47017..3d9f8ee8d7d 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/gwtest.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/gwtest.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -392,9 +392,9 @@ int main(int argc, char **argv) /* setup to see sigchld on the forked tests */ PMIX_CONSTRUCT(&children, pmix_list_t); - event_assign(&handler, pmix_globals.evbase, SIGCHLD, - EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler); - event_add(&handler, NULL); + pmix_event_assign(&handler, pmix_globals.evbase, SIGCHLD, + EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler); + pmix_event_add(&handler, NULL); /* we have a single namespace for all clients */ atmp = NULL; @@ -1023,7 +1023,7 @@ static void wait_signal_callback(int fd, short event, void *arg) pid_t pid; wait_tracker_t *t2; - if (SIGCHLD != event_get_signal(sig)) { + if (SIGCHLD != pmix_event_get_signal(sig)) { return; } diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/quietclient.c b/opal/mca/pmix/pmix3x/pmix/test/simple/quietclient.c index 428ba4e341b..d91e7e58e35 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/quietclient.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/quietclient.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -103,12 +103,15 @@ int main(int argc, char **argv) pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; - uint32_t nprocs, n; + uint32_t nprocs, n, k, nlocal; int cnt, j; volatile bool active; pmix_info_t *iptr; size_t ninfo; pmix_status_t code; + char **peers; + bool all_local, local; + pmix_rank_t *locals = NULL; /* init us and declare we are a test programming model */ PMIX_INFO_CREATE(iptr, 2); @@ -152,11 +155,11 @@ int main(int argc, char **argv) usleep(10); } - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %s", + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } @@ -173,6 +176,27 @@ int main(int argc, char **argv) goto done; } + /* get a list of our local peers */ + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get local peers failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + goto done; + } + /* split the returned string to get the rank of each local peer */ + peers = pmix_argv_split(val->data.string, ','); + PMIX_VALUE_RELEASE(val); + nlocal = pmix_argv_count(peers); + if (nprocs == nlocal) { + all_local = true; + } else { + all_local = false; + locals = (pmix_rank_t*)malloc(pmix_argv_count(peers) * sizeof(pmix_rank_t)); + for (cnt=0; NULL != peers[cnt]; cnt++) { + locals[cnt] = strtoul(peers[cnt], NULL, 10); + } + } + pmix_argv_free(peers); + for (cnt=0; cnt < MAXCNT; cnt++) { (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, myproc.rank, cnt); value.type = PMIX_UINT64; @@ -213,42 +237,67 @@ int main(int argc, char **argv) for (j=0; j <= cnt; j++) { for (n=0; n < nprocs; n++) { proc.rank = n; - (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", - myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); - continue; - } - if (NULL == val) { - pmix_output(0, "Client ns %s rank %d: NULL value returned", - myproc.nspace, myproc.rank); - break; - } - if (PMIX_UINT64 != val->type) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); - PMIX_VALUE_RELEASE(val); - free(tmp); - continue; + if (all_local) { + local = true; + } else { + local = false; + /* see if this proc is local to us */ + for (k=0; k < nlocal; k++) { + if (proc.rank == locals[k]) { + local = true; + break; + } + } } - if (1234 != val->data.uint64) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); + if (local) { + (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", + myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); + continue; + } + if (NULL == val) { + pmix_output(0, "Client ns %s rank %d: NULL value returned", + myproc.nspace, myproc.rank); + break; + } + if (PMIX_UINT64 != val->type) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); + PMIX_VALUE_RELEASE(val); + free(tmp); + continue; + } + if (1234 != val->data.uint64) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); + PMIX_VALUE_RELEASE(val); + free(tmp); + continue; + } PMIX_VALUE_RELEASE(val); free(tmp); - continue; - } - PMIX_VALUE_RELEASE(val); - free(tmp); - if (n != myproc.rank) { + /* now check that we don't get data for a remote proc - note that we + * always can get our own remote data as we published it */ + if (proc.rank != myproc.rank) { + (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); + if (PMIX_SUCCESS == (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + /* this data should _not_ be found as we are on the same node + * and the data was "put" with a PMIX_REMOTE scope */ + pmix_output(0, "ERROR: Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", + myproc.nspace, myproc.rank, j, tmp); + } + PMIX_VALUE_RELEASE(val); + free(tmp); + } + } else { (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - /* this data should _not_ be found as we are on the same node - * and the data was "put" with a PMIX_REMOTE scope */ - continue; + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed for remote proc", + myproc.nspace, myproc.rank, j, tmp); + } + if (NULL != val) { + PMIX_VALUE_RELEASE(val); } - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", - myproc.nspace, myproc.rank, j, tmp); - PMIX_VALUE_RELEASE(val); free(tmp); } } diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simpclient.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simpclient.c index ae5b4ababc4..80aea143083 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simpclient.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simpclient.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -127,13 +127,16 @@ int main(int argc, char **argv) pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; - uint32_t nprocs, n; + uint32_t nprocs, n, k, nlocal; int cnt, j; bool doabort = false; volatile bool active; pmix_info_t info, *iptr; size_t ninfo; pmix_status_t code; + char **peers; + bool all_local, local; + pmix_rank_t *locals = NULL; if (1 < argc) { if (0 == strcmp("-abort", argv[1])) { @@ -185,17 +188,17 @@ int main(int argc, char **argv) } - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %s", + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); /* put a few values */ (void)asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank); @@ -207,6 +210,27 @@ int main(int argc, char **argv) goto done; } + /* get a list of our local peers */ + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get local peers failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + goto done; + } + /* split the returned string to get the rank of each local peer */ + peers = pmix_argv_split(val->data.string, ','); + PMIX_VALUE_RELEASE(val); + nlocal = pmix_argv_count(peers); + if (nprocs == nlocal) { + all_local = true; + } else { + all_local = false; + locals = (pmix_rank_t*)malloc(pmix_argv_count(peers) * sizeof(pmix_rank_t)); + for (cnt=0; NULL != peers[cnt]; cnt++) { + locals[cnt] = strtoul(peers[cnt], NULL, 10); + } + } + pmix_argv_free(peers); + for (cnt=0; cnt < MAXCNT; cnt++) { (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, myproc.rank, cnt); value.type = PMIX_UINT64; @@ -247,43 +271,71 @@ int main(int argc, char **argv) for (j=0; j <= cnt; j++) { for (n=0; n < nprocs; n++) { proc.rank = n; - (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", - myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); - continue; - } - if (NULL == val) { - pmix_output(0, "Client ns %s rank %d: NULL value returned", - myproc.nspace, myproc.rank); - break; - } - if (PMIX_UINT64 != val->type) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); - PMIX_VALUE_RELEASE(val); - free(tmp); - continue; + if (all_local) { + local = true; + } else { + local = false; + /* see if this proc is local to us */ + for (k=0; k < nlocal; k++) { + if (proc.rank == locals[k]) { + local = true; + break; + } + } } - if (1234 != val->data.uint64) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); + if (local) { + (void)asprintf(&tmp, "%s-%d-local-%d", myproc.nspace, n, j); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", + myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); + continue; + } + if (NULL == val) { + pmix_output(0, "Client ns %s rank %d: NULL value returned", + myproc.nspace, myproc.rank); + break; + } + if (PMIX_UINT64 != val->type) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); + PMIX_VALUE_RELEASE(val); + free(tmp); + continue; + } + if (1234 != val->data.uint64) { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); + PMIX_VALUE_RELEASE(val); + free(tmp); + continue; + } + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); PMIX_VALUE_RELEASE(val); free(tmp); - continue; - } - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); - PMIX_VALUE_RELEASE(val); - free(tmp); - if (n != myproc.rank) { + /* now check that we don't get data for a remote proc - note that we + * always can get our own remote data as we published it */ + if (proc.rank != myproc.rank) { + (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { + /* this data should _not_ be found as we are on the same node + * and the data was "put" with a PMIX_REMOTE scope */ + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); + } else { + pmix_output(0, "ERROR: Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", + myproc.nspace, myproc.rank, j, tmp); + } + if (NULL != val) { + PMIX_VALUE_RELEASE(val); + } + free(tmp); + } + } else { (void)asprintf(&tmp, "%s-%d-remote-%d", proc.nspace, n, j); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - /* this data should _not_ be found as we are on the same node - * and the data was "put" with a PMIX_REMOTE scope */ + if (PMIX_SUCCESS == (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); - continue; + } else { + pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed for remote proc", + myproc.nspace, myproc.rank, j, tmp); } - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned remote data for a local proc", - myproc.nspace, myproc.rank, j, tmp); PMIX_VALUE_RELEASE(val); free(tmp); } @@ -321,7 +373,7 @@ int main(int argc, char **argv) /* log something */ PMIX_INFO_CONSTRUCT(&info); - PMIX_INFO_LOAD(&info, PMIX_LOG_STDERR, "test log msg", PMIX_STRING); + PMIX_INFO_LOAD(&info, PMIX_LOG_STDERR, "test log msg\n", PMIX_STRING); active = true; rc = PMIx_Log_nb(&info, 1, NULL, 0, opcbfunc, (void*)&active); if (PMIX_SUCCESS != rc) { diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simpdie.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simpdie.c index fd6a61eba4b..cb0ae490227 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simpdie.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simpdie.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -97,16 +97,17 @@ int main(int argc, char **argv) } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); completed = false; /* register our errhandler */ diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simpdmodex.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simpdmodex.c index c042f1948a8..2a7e067d148 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simpdmodex.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simpdmodex.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -98,9 +98,12 @@ int main(int argc, char **argv) pmix_value_t *val = &value; char *tmp; pmix_proc_t proc; - uint32_t n, num_gets; + uint32_t n, num_gets, k, nlocal; bool active; bool dofence = true; + bool local, all_local; + char **peers; + pmix_rank_t *locals; if (NULL != getenv("PMIX_SIMPDMODEX_ASYNC")) { dofence = false; @@ -113,16 +116,17 @@ int main(int argc, char **argv) } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); /* put a few values */ (void)asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank); @@ -174,24 +178,60 @@ int main(int argc, char **argv) } } + /* get a list of our local peers */ + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get local peers failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + goto done; + } + /* split the returned string to get the rank of each local peer */ + peers = pmix_argv_split(val->data.string, ','); + PMIX_VALUE_RELEASE(val); + nlocal = pmix_argv_count(peers); + if (nprocs == nlocal) { + all_local = true; + } else { + all_local = false; + locals = (pmix_rank_t*)malloc(pmix_argv_count(peers) * sizeof(pmix_rank_t)); + for (n=0; NULL != peers[n]; n++) { + locals[n] = strtoul(peers[n], NULL, 10); + } + } + pmix_argv_free(peers); + /* get the committed data - ask for someone who doesn't exist as well */ num_gets = 0; for (n=0; n < nprocs; n++) { - (void)asprintf(&tmp, "%s-%d-local", myproc.nspace, n); - proc.rank = n; - if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, - NULL, 0, valcbfunc, tmp))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); - goto done; + if (all_local) { + local = true; + } else { + local = false; + /* see if this proc is local to us */ + for (k=0; k < nlocal; k++) { + if (proc.rank == locals[k]) { + local = true; + break; + } + } } - ++num_gets; - (void)asprintf(&tmp, "%s-%d-remote", myproc.nspace, n); - if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, - NULL, 0, valcbfunc, tmp))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); - goto done; + if (local) { + (void)asprintf(&tmp, "%s-%d-local", myproc.nspace, n); + proc.rank = n; + if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, + NULL, 0, valcbfunc, tmp))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); + goto done; + } + ++num_gets; + } else { + (void)asprintf(&tmp, "%s-%d-remote", myproc.nspace, n); + if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&proc, tmp, + NULL, 0, valcbfunc, tmp))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get %s failed: %d", myproc.nspace, n, tmp, rc); + goto done; + } + ++num_gets; } - ++num_gets; } if (dofence) { diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simpdyn.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simpdyn.c index a20b8418a4f..ef5286dd6e5 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simpdyn.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simpdyn.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -62,16 +62,17 @@ int main(int argc, char **argv) } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); /* call fence to sync */ PMIX_PROC_CONSTRUCT(&proc); @@ -85,19 +86,12 @@ int main(int argc, char **argv) /* rank=0 calls spawn */ if (0 == myproc.rank) { PMIX_APP_CREATE(app, 1); - app->cmd = strdup("gumby"); + app->cmd = strdup("./simpclient"); app->maxprocs = 2; - pmix_argv_append_nosize(&app->argv, "gumby"); + pmix_argv_append_nosize(&app->argv, "simpclient"); pmix_argv_append_nosize(&app->argv, "-n"); pmix_argv_append_nosize(&app->argv, "2"); pmix_setenv("PMIX_ENV_VALUE", "3", true, &app->env); - PMIX_INFO_CREATE(app->info, 2); - (void)strncpy(app->info[0].key, "DARTH", PMIX_MAX_KEYLEN); - app->info[0].value.type = PMIX_INT8; - app->info[0].value.data.int8 = 12; - (void)strncpy(app->info[1].key, "VADER", PMIX_MAX_KEYLEN); - app->info[1].value.type = PMIX_DOUBLE; - app->info[1].value.data.dval = 12.34; pmix_output(0, "Client ns %s rank %d: calling PMIx_Spawn", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Spawn(NULL, 0, app, 1, nsp2))) { @@ -106,25 +100,18 @@ int main(int argc, char **argv) } PMIX_APP_FREE(app, 1); - /* check to see if we got the expected info back */ - if (0 != strncmp(nsp2, "DYNSPACE", PMIX_MAX_NSLEN)) { - pmix_output(0, "Client ns %s rank %d: PMIx_Spawn returned incorrect nspace: %s", myproc.nspace, myproc.rank, nsp2); - goto done; - } else { - pmix_output(0, "Client ns %s rank %d: PMIx_Spawn succeeded returning nspace: %s", myproc.nspace, myproc.rank, nsp2); - } - /* get their universe size */ + /* get their job size */ (void)strncpy(proc.nspace, nsp2, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; val = NULL; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val)) || + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val)) || NULL == val) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + pmix_output(0, "Client ns %s rank %d: PMIx_Get job %s size failed: %d", myproc.nspace, myproc.rank, nsp2, rc); goto done; } ntmp = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe %s size %d", myproc.nspace, myproc.rank, nsp2, (int)ntmp); + pmix_output(0, "Client %s:%d job %s size %d", myproc.nspace, myproc.rank, nsp2, (int)ntmp); } /* just cycle the connect/disconnect functions */ diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simpft.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simpft.c index 57a6bfc8c6b..a6acf5f89ca 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simpft.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simpft.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -83,16 +83,17 @@ int main(int argc, char **argv) } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); completed = false; /* register our errhandler */ diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simpjctrl.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simpjctrl.c index c9ac506520a..037f7eae383 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simpjctrl.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simpjctrl.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -133,14 +133,17 @@ int main(int argc, char **argv) (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - /* get our universe size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc); + /* get our job size */ + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %s\n", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); + fprintf(stderr, "Client %s:%d job size %d\n", myproc.nspace, myproc.rank, nprocs); /* inform the RM that we are preemptible, and that our checkpoint methods are * "signal" on SIGUSR2 and event on PMIX_JCTRL_CHECKPOINT */ diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simplegacy.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simplegacy.c index 1b15366cd4a..98f40a15dd4 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simplegacy.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simplegacy.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -24,230 +24,90 @@ */ #include -#include +#include #include #include #include #include -#include "src/class/pmix_object.h" -#include "src/util/output.h" -#include "src/util/printf.h" - #define MAXCNT 3 -static volatile bool completed = false; -static pmix_proc_t myproc; - -static void notification_fn(size_t evhdlr_registration_id, - pmix_status_t status, - const pmix_proc_t *source, - pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, - void *cbdata) -{ - pmix_output(0, "Client %s:%d NOTIFIED with status %s", myproc.nspace, myproc.rank, PMIx_Error_string(status)); - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } - completed = true; -} - -static void errhandler_reg_callbk(pmix_status_t status, - size_t errhandler_ref, - void *cbdata) -{ - volatile bool *active = (volatile bool*)cbdata; - - pmix_output(0, "Client: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%lu", - status, (unsigned long)errhandler_ref); - *active = false; -} - -/* this is an event notification function that we explicitly request - * be called when the PMIX_MODEL_DECLARED notification is issued. - * We could catch it in the general event notification function and test - * the status to see if the status matched, but it often is simpler - * to declare a use-specific notification callback point. In this case, - * we are asking to know whenever a model is declared as a means - * of testing server self-notification */ -static void model_callback(size_t evhdlr_registration_id, - pmix_status_t status, - const pmix_proc_t *source, - pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, - void *cbdata) -{ - size_t n; - - /* just let us know it was received */ - fprintf(stderr, "%s:%d Model event handler called with status %d(%s)\n", - myproc.nspace, myproc.rank, status, PMIx_Error_string(status)); - for (n=0; n < ninfo; n++) { - if (PMIX_STRING == info[n].value.type) { - fprintf(stderr, "%s:%d\t%s:\t%s\n", - myproc.nspace, myproc.rank, - info[n].key, info[n].value.data.string); - } - } - - /* we must NOT tell the event handler state machine that we - * are the last step as that will prevent it from notifying - * anyone else that might be listening for declarations */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } -} - -/* event handler registration is done asynchronously */ -static void model_registration_callback(pmix_status_t status, - size_t evhandler_ref, - void *cbdata) -{ - volatile int *active = (volatile int*)cbdata; - - fprintf(stderr, "simpclient EVENT HANDLER REGISTRATION RETURN STATUS %d, ref=%lu\n", - status, (unsigned long)evhandler_ref); - *active = false; -} - int main(int argc, char **argv) { - int rc; - pmix_value_t value; - pmix_value_t *val = &value; + int rc, j, n; char *tmp; - pmix_proc_t proc; - uint32_t nprocs, n; - int cnt, j; - volatile bool active; - pmix_info_t info, *iptr; - size_t ninfo; - pmix_status_t code; - - /* init us and declare we are a test programming model */ - PMIX_INFO_CREATE(iptr, 2); - PMIX_INFO_LOAD(&iptr[0], PMIX_PROGRAMMING_MODEL, "TEST", PMIX_STRING); - PMIX_INFO_LOAD(&iptr[1], PMIX_MODEL_LIBRARY_NAME, "PMIX", PMIX_STRING); - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, iptr, 2))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %s", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + int spawned; + int rank; + int nprocs; + char value[1024]; + + fprintf(stderr, "Client calling init\n"); + if (PMI_SUCCESS != (rc = PMI_Init(&spawned))) { + fprintf(stderr, "Client PMI_Init failed: %d\n", rc); exit(rc); } - PMIX_INFO_FREE(iptr, 2); - pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); + fprintf(stderr, "Client Running\n"); /* test something */ - (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get failed: %s", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + if (PMI_SUCCESS != (rc = PMI_Get_rank(&rank))) { + fprintf(stderr, "Client PMI_Get_rank failed: %d\n", rc); exit(rc); } - nprocs = val->data .uint32; - PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); - - /* register a handler specifically for when models declare */ - active = true; - ninfo = 1; - PMIX_INFO_CREATE(iptr, ninfo); - PMIX_INFO_LOAD(&iptr[0], PMIX_EVENT_HDLR_NAME, "SIMPCLIENT-MODEL", PMIX_STRING); - code = PMIX_MODEL_DECLARED; - PMIx_Register_event_handler(&code, 1, iptr, ninfo, - model_callback, model_registration_callback, (void*)&active); - while (active) { - usleep(10); - } - PMIX_INFO_FREE(iptr, ninfo); - - /* register our errhandler */ - active = true; - PMIx_Register_event_handler(NULL, 0, NULL, 0, - notification_fn, errhandler_reg_callbk, (void*)&active); - while (active) { - usleep(10); + if (PMI_SUCCESS != (rc = PMI_Get_universe_size(&nprocs))) { + fprintf(stderr, "Client %d: PMI_Get_universe_size failed: %d\n", rank, rc); + exit(rc); } + fprintf(stderr, "Client %d job size %d\n", rank, nprocs); - memset(&info, 0, sizeof(pmix_info_t)); - (void)strncpy(info.key, PMIX_COLLECT_DATA, PMIX_MAX_KEYLEN); - info.value.type = PMIX_UNDEF; - info.value.data.flag = 1; - - for (cnt=0; cnt < MAXCNT; cnt++) { - pmix_output(0, "EXECUTING LOOP %d", cnt); - for (j=0; j < 10; j++) { - (void)asprintf(&tmp, "%s-%d-gasnet-%d-%d", myproc.nspace, myproc.rank, cnt, j); - value.type = PMIX_UINT64; - value.data.uint64 = 1234; - if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_GLOBAL, tmp, &value))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Put failed: %s", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); - goto done; - } - free(tmp); - } - - if (PMIX_SUCCESS != (rc = PMIx_Commit())) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Commit failed: %s", - myproc.nspace, myproc.rank, cnt, PMIx_Error_string(rc)); + for (j=0; j < 10; j++) { + (void)asprintf(&tmp, "%d-gasnet-0-%d", rank, j); + if (PMI_SUCCESS != (rc = PMI_KVS_Put("foobar", tmp, "myvalue"))) { + fprintf(stderr, "Client %d: j %d PMI_KVS_Put failed: %d\n", + rank, j, rc); goto done; } + free(tmp); + } - /* call fence to ensure the data is received */ - if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0, &info, 1))) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Fence failed: %s", - myproc.nspace, myproc.rank, cnt, PMIx_Error_string(rc)); + if (PMIX_SUCCESS != (rc = PMI_KVS_Commit("foobar"))) { + fprintf(stderr, "Client %d: PMI_KVS_Commit failed: %d\n", rank, rc); goto done; - } + } - /* check the returned data */ - (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_UNDEF; - for (j=0; j < 10; j++) { - for (n=0; n < nprocs; n++) { - (void)asprintf(&tmp, "%s-%d-gasnet-%d-%d", myproc.nspace, n, cnt, j); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s failed: %s", - myproc.nspace, myproc.rank, j, tmp, PMIx_Error_string(rc)); - continue; - } - if (NULL == val) { - pmix_output(0, "Client ns %s rank %d: NULL value returned", - myproc.nspace, myproc.rank); - break; - } - if (PMIX_UINT64 != val->type) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong type: %d", myproc.nspace, myproc.rank, j, tmp, val->type); - PMIX_VALUE_RELEASE(val); - free(tmp); - continue; - } - if (1234 != val->data.uint64) { - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned wrong value: %d", myproc.nspace, myproc.rank, j, tmp, (int)val->data.uint64); - PMIX_VALUE_RELEASE(val); - free(tmp); - continue; - } - pmix_output(0, "Client ns %s rank %d cnt %d: PMIx_Get %s returned correct", myproc.nspace, myproc.rank, j, tmp); - PMIX_VALUE_RELEASE(val); - free(tmp); + fprintf(stderr, "Client rank %d: CALLING PMI_Barrier\n", rank); + + /* call fence to ensure the data is received */ + if (PMI_SUCCESS != (rc = PMI_Barrier())) { + fprintf(stderr, "Client %d: PMI_Barrier failed: %d\n", rank, rc); + goto done; + } + + /* check the returned data */ + for (j=0; j < 10; j++) { + for (n=0; n < nprocs; n++) { + (void)asprintf(&tmp, "%d-gasnet-0-%d", n, j); + fprintf(stderr, "Client %d: Calling get\n", rank); + if (PMI_SUCCESS != (rc = PMI_KVS_Get("foobar", tmp, value, 1024))) { + fprintf(stderr, "Client %d: PMI_Get failed: %d\n", rank, rc); + continue; } + if (0 == strcmp(value, "myvalue")) { + fprintf(stderr, "Client %d: PMI_Get returned correct value\n", rank); + } else { + fprintf(stderr, "Client %d: PMI_Get returned incorrect value\n", rank); + } + free(tmp); } } done: /* finalize us */ - pmix_output(0, "Client ns %s rank %d: Finalizing", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %s\n", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); + fprintf(stderr, "Client rank %d: Finalizing\n", rank); + if (PMI_SUCCESS != (rc = PMI_Finalize())) { + fprintf(stderr, "Client rank %d: finalize failed %d\n", rank, rc); } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank); + fprintf(stderr, "Client %d:PMI_Finalize successfully completed\n", rank); } fflush(stderr); return(rc); diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simppub.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simppub.c index 12d6c68735e..2ccf9b258f4 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simppub.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simppub.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -54,16 +54,17 @@ int main(int argc, char **argv) } pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", + myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); /* call fence to ensure the data is received */ PMIX_PROC_CONSTRUCT(&proc); diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simptest.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simptest.c index e31f4211b9f..b78165f704a 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simptest.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simptest.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -38,7 +38,6 @@ #include #include #include -#include PMIX_EVENT_HEADER #if PMIX_HAVE_HWLOC #include @@ -230,8 +229,6 @@ static void dlcbfunc(int sd, short flags, void *cbdata) { myxfer_t *x = (myxfer_t*)cbdata; - pmix_output(0, "INVENTORY READY FOR DELIVERY"); - PMIx_server_deliver_inventory(x->info, x->ninfo, NULL, 0, opcbfunc, (void*)x); } @@ -245,8 +242,6 @@ static void infocbfunc(pmix_status_t status, myxfer_t *x; size_t n; - pmix_output(0, "INVENTORY RECEIVED"); - /* we don't have any place to send this, so for test * purposes only, let's push it back down for processing. * Note: it must be thread-shifted first as we are in @@ -343,12 +338,14 @@ int main(int argc, char **argv) pmix_info_t *info; size_t ninfo; bool cross_version = false; + bool usock = true; bool hwloc = false; #if PMIX_HAVE_HWLOC char *hwloc_file = NULL; #endif mylock_t mylock; pmix_status_t code; + sigset_t unblock; /* smoke test */ if (PMIX_SUCCESS != 0) { @@ -378,6 +375,25 @@ int main(int argc, char **argv) /* cross-version test - we will set one child to * run at a different version. Requires -n >= 2 */ cross_version = true; + usock = false; + } else if (0 == strcmp("-u", argv[n])) { + /* enable usock */ + usock = false; +#if PMIX_HAVE_HWLOC + } else if (0 == strcmp("-hwloc", argv[n]) || + 0 == strcmp("--hwloc", argv[n])) { + /* test hwloc support */ + hwloc = true; + } else if (0 == strcmp("-hwloc-file", argv[n]) || + 0 == strcmp("--hwloc-file", argv[n])) { + if (NULL == argv[n+1]) { + fprintf(stderr, "The --hwloc-file option requires an argument\n"); + exit(1); + } + hwloc_file = strdup(argv[n+1]); + hwloc = true; + ++n; +#endif } else if (0 == strcmp("-h", argv[n])) { /* print the options and exit */ fprintf(stderr, "usage: simptest \n"); @@ -393,6 +409,12 @@ int main(int argc, char **argv) if (NULL == executable) { executable = strdup("./simpclient"); } + /* check for executable existence and permissions */ + if (0 != access(executable, X_OK)) { + fprintf(stderr, "Executable %s not found or missing executable permissions\n", executable); + exit(1); + } + if (cross_version && nprocs < 2) { fprintf(stderr, "Cross-version testing requires at least two clients\n"); exit(1); @@ -407,6 +429,21 @@ int main(int argc, char **argv) fprintf(stderr, "Testing version %s\n", PMIx_Get_version()); + /* ensure that SIGCHLD is unblocked as we need to capture it */ + if (0 != sigemptyset(&unblock)) { + fprintf(stderr, "SIGEMPTYSET FAILED\n"); + exit(1); + } + if (0 != sigaddset(&unblock, SIGCHLD)) { + fprintf(stderr, "SIGADDSET FAILED\n"); + exit(1); + } + if (0 != sigprocmask(SIG_UNBLOCK, &unblock, NULL)) { + fprintf(stderr, "SIG_UNBLOCK FAILED\n"); + exit(1); + } + + /* setup the server library and tell it to support tool connections */ #if PMIX_HAVE_HWLOC if (hwloc) { @@ -477,9 +514,9 @@ int main(int argc, char **argv) /* setup to see sigchld on the forked tests */ PMIX_CONSTRUCT(&children, pmix_list_t); - event_assign(&handler, pmix_globals.evbase, SIGCHLD, - EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler); - event_add(&handler, NULL); + pmix_event_assign(&handler, pmix_globals.evbase, SIGCHLD, + EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler); + pmix_event_add(&handler, NULL); /* we have a single namespace for all clients */ atmp = NULL; @@ -539,6 +576,9 @@ int main(int argc, char **argv) } else { pmix_setenv("PMIX_MCA_ptl", "usock", true, &client_env); } + } else if (!usock) { + /* don't disable usock => enable it on client */ + pmix_setenv("PMIX_MCA_ptl", "usock", true, &client_env); } x = PMIX_NEW(myxfer_t); if (PMIX_SUCCESS != (rc = PMIx_server_register_client(&proc, myuid, mygid, @@ -616,6 +656,24 @@ int main(int argc, char **argv) DEBUG_DESTRUCT_LOCK(&globallock); PMIX_INFO_FREE(info, ninfo); +#if 0 + fprintf(stderr, "TEST NONDEFAULT NOTIFICATION\n"); + /* verify that notifications don't recirculate */ + ninfo = 1; + PMIX_INFO_CREATE(info, ninfo); + /* mark that it is not to go to any default handlers */ + PMIX_INFO_LOAD(&info[0], PMIX_EVENT_NON_DEFAULT, NULL, PMIX_BOOL); + PMIx_Notify_event(PMIX_ERR_DEBUGGER_RELEASE, + &pmix_globals.myid, PMIX_RANGE_LOCAL, + info, ninfo, NULL, NULL); + PMIX_INFO_FREE(info, ninfo); + /* wait a little in case we get notified */ + for (ninfo=0; ninfo < 100000; ninfo++) { + struct timespec t = {0, 100}; + nanosleep(&t, NULL); + } +#endif + done: /* deregister the event handlers */ PMIx_Deregister_event_handler(0, NULL, NULL); @@ -645,42 +703,132 @@ static void set_namespace(int nprocs, char *ranks, char *nspace, pmix_op_cbfunc_t cbfunc, myxfer_t *x) { char *regex, *ppn; - char hostname[PMIX_MAXHOSTNAMELEN]; + int n, m, k; + pmix_info_t *info; + pmix_data_array_t *array; - gethostname(hostname, sizeof(hostname)); - x->ninfo = 7; + x->ninfo = 16 + nprocs; PMIX_INFO_CREATE(x->info, x->ninfo); - (void)strncpy(x->info[0].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); - x->info[0].value.type = PMIX_UINT32; - x->info[0].value.data.uint32 = nprocs; - - (void)strncpy(x->info[1].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); - x->info[1].value.type = PMIX_UINT32; - x->info[1].value.data.uint32 = 0; - - (void)strncpy(x->info[2].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); - x->info[2].value.type = PMIX_UINT32; - x->info[2].value.data.uint32 = nprocs; - - (void)strncpy(x->info[3].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); - x->info[3].value.type = PMIX_STRING; - x->info[3].value.data.string = strdup(ranks); - - PMIx_generate_regex(hostname, ®ex); - (void)strncpy(x->info[4].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); - x->info[4].value.type = PMIX_STRING; - x->info[4].value.data.string = regex; - - PMIx_generate_ppn(ranks, &ppn); - (void)strncpy(x->info[5].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); - x->info[5].value.type = PMIX_STRING; - x->info[5].value.data.string = ppn; - - (void)strncpy(x->info[6].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); - x->info[6].value.type = PMIX_UINT32; - x->info[6].value.data.uint32 = nprocs; - + n = 0; + + PMIx_generate_regex("test000,test001,test002", ®ex); + PMIx_generate_ppn("0;1;2", &ppn); + + (void)strncpy(x->info[n].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = regex; + ++n; + + /* if we have some empty nodes, then fill their spots */ + (void)strncpy(x->info[n].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = ppn; + ++n; + + (void)strncpy(x->info[n].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 0; + ++n; + + (void)strncpy(x->info[n].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = strdup(ranks); + ++n; + + (void)strncpy(x->info[n].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_JOBID, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = strdup("1234"); + ++n; + + (void)strncpy(x->info[n].key, PMIX_NPROC_OFFSET, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 0; + ++n; + + (void)strncpy(x->info[n].key, PMIX_NODEID, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 0; + ++n; + + (void)strncpy(x->info[n].key, PMIX_NODE_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_NUM_NODES, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 1; + ++n; + + (void)strncpy(x->info[n].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_MAX_PROCS, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_JOB_NUM_APPS, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 1; + ++n; + + (void)strncpy(x->info[n].key, PMIX_LOCALLDR, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_PROC_RANK; + x->info[n].value.data.uint32 = 0; + ++n; + + /* add the proc-specific data */ + for (m=0; m < nprocs; m++) { + (void)strncpy(x->info[n].key, PMIX_PROC_DATA, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_DATA_ARRAY; + PMIX_DATA_ARRAY_CREATE(array, 5, PMIX_INFO); + x->info[n].value.data.darray = array; + info = (pmix_info_t*)array->array; + k = 0; + (void)strncpy(info[k].key, PMIX_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_PROC_RANK; + info[k].value.data.rank = m; + ++k; + (void)strncpy(info[k].key, PMIX_GLOBAL_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_PROC_RANK; + info[k].value.data.rank = m; + ++k; + (void)strncpy(info[k].key, PMIX_LOCAL_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_UINT16; + info[k].value.data.uint16 = m; + ++k; + + (void)strncpy(info[k].key, PMIX_NODE_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_UINT16; + info[k].value.data.uint16 = m; + ++k; + + (void)strncpy(info[k].key, PMIX_NODEID, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_UINT32; + info[k].value.data.uint32 = 0; + ++k; + /* move to next proc */ + ++n; + } PMIx_server_register_nspace(nspace, nprocs, x->info, x->ninfo, cbfunc, x); } @@ -694,6 +842,12 @@ static void errhandler(size_t evhdlr_registration_id, void *cbdata) { pmix_output(0, "SERVER: ERRHANDLER CALLED WITH STATUS %d", status); + /* we must NOT tell the event handler state machine that we + * are the last step as that will prevent it from notifying + * anyone else that might be listening for declarations */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); + } } static void errhandler_reg_callbk (pmix_status_t status, @@ -702,8 +856,6 @@ static void errhandler_reg_callbk (pmix_status_t status, { mylock_t *lock = (mylock_t*)cbdata; - pmix_output(0, "SERVER: ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%lu", - status, (unsigned long)errhandler_ref); lock->status = status; DEBUG_WAKEUP_THREAD(lock); } @@ -716,8 +868,6 @@ static pmix_status_t connected(const pmix_proc_t *proc, void *server_object, static pmix_status_t finalized(const pmix_proc_t *proc, void *server_object, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: FINALIZED %s:%d WAKEUP %d", - proc->nspace, proc->rank, wakeup); return PMIX_OPERATION_SUCCEEDED; } @@ -793,7 +943,6 @@ static pmix_status_t fencenb_fn(const pmix_proc_t procs[], size_t nprocs, { pmix_shift_caddy_t *scd; - pmix_output(0, "SERVER: FENCENB"); scd = PMIX_NEW(pmix_shift_caddy_t); scd->status = PMIX_SUCCESS; scd->data = data; @@ -811,8 +960,6 @@ static pmix_status_t dmodex_fn(const pmix_proc_t *proc, { pmix_shift_caddy_t *scd; - pmix_output(0, "SERVER: DMODEX"); - /* if this is a timeout test, then do nothing */ if (istimeouttest) { return PMIX_SUCCESS; @@ -835,8 +982,6 @@ static pmix_status_t publish_fn(const pmix_proc_t *proc, pmix_locdat_t *p; size_t n; - pmix_output(0, "SERVER: PUBLISH"); - for (n=0; n < ninfo; n++) { p = PMIX_NEW(pmix_locdat_t); (void)strncpy(p->pdata.proc.nspace, proc->nspace, PMIX_MAX_NSLEN); @@ -877,8 +1022,6 @@ static pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys, pmix_status_t ret = PMIX_ERR_NOT_FOUND; lkobj_t *lk; - pmix_output(0, "SERVER: LOOKUP"); - PMIX_CONSTRUCT(&results, pmix_list_t); for (n=0; NULL != keys[n]; n++) { @@ -928,8 +1071,6 @@ static pmix_status_t unpublish_fn(const pmix_proc_t *proc, char **keys, pmix_locdat_t *p, *p2; size_t n; - pmix_output(0, "SERVER: UNPUBLISH"); - for (n=0; NULL != keys[n]; n++) { PMIX_LIST_FOREACH_SAFE(p, p2, &pubdata, pmix_locdat_t) { if (0 == strncmp(keys[n], p->pdata.key, PMIX_MAX_KEYLEN)) { @@ -961,8 +1102,6 @@ static pmix_status_t spawn_fn(const pmix_proc_t *proc, pmix_proc_t *pptr; bool spawned; - pmix_output(0, "SERVER: SPAWN"); - /* check the job info for parent and spawned keys */ for (n=0; n < ninfo; n++) { if (0 == strncmp(job_info[n].key, PMIX_PARENT_ID, PMIX_MAX_KEYLEN)) { @@ -996,8 +1135,6 @@ static pmix_status_t connect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: CONNECT"); - /* in practice, we would pass this request to the local * resource manager for handling */ @@ -1011,8 +1148,6 @@ static pmix_status_t disconnect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: DISCONNECT"); - return PMIX_OPERATION_SUCCEEDED; } @@ -1035,7 +1170,6 @@ static pmix_status_t notify_event(pmix_status_t code, pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) { - pmix_output(0, "SERVER: NOTIFY EVENT"); return PMIX_OPERATION_SUCCEEDED; } @@ -1064,8 +1198,6 @@ static pmix_status_t query_fn(pmix_proc_t *proct, pmix_info_t *info; query_data_t qd; - pmix_output(0, "SERVER: QUERY"); - if (NULL == cbfunc) { return PMIX_ERROR; } @@ -1093,8 +1225,6 @@ static void tool_connect_fn(pmix_info_t *info, size_t ninfo, { pmix_proc_t proc; - pmix_output(0, "SERVER: TOOL CONNECT"); - /* just pass back an arbitrary nspace */ (void)strncpy(proc.nspace, "TOOL", PMIX_MAX_NSLEN); proc.rank = 0; @@ -1122,8 +1252,6 @@ static void log_fn(const pmix_proc_t *client, { mylog_t *lg = (mylog_t *)malloc(sizeof(mylog_t)); - pmix_output(0, "SERVER: LOG"); - lg->cbfunc = cbfunc; lg->cbdata = cbdata; PMIX_THREADSHIFT(lg, foobar); @@ -1161,7 +1289,7 @@ static void wait_signal_callback(int fd, short event, void *arg) pid_t pid; wait_tracker_t *t2; - if (SIGCHLD != event_get_signal(sig)) { + if (SIGCHLD != pmix_event_get_signal(sig)) { return; } diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/simptimeout.c b/opal/mca/pmix/pmix3x/pmix/test/simple/simptimeout.c index f5454029887..10835d68abf 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/simptimeout.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/simptimeout.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * $COPYRIGHT$ * @@ -103,17 +103,17 @@ int main(int argc, char **argv) usleep(10); } - /* get our universe size */ + /* get our job size */ (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %s", + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { + pmix_output(0, "Client ns %s rank %d: PMIx_Get job size failed: %s", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } nprocs = val->data.uint32; PMIX_VALUE_RELEASE(val); - pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs); + pmix_output(0, "Client %s:%d job size %d", myproc.nspace, myproc.rank, nprocs); /* if we are rank=0, then do a fence with timeout */ if (0 == myproc.rank) { diff --git a/opal/mca/pmix/pmix3x/pmix/test/simple/stability.c b/opal/mca/pmix/pmix3x/pmix/test/simple/stability.c index d67b6f84109..c97df8c74fe 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/simple/stability.c +++ b/opal/mca/pmix/pmix3x/pmix/test/simple/stability.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -183,6 +183,8 @@ static pmix_list_t pubdata; static pmix_event_t handler; static pmix_list_t children; static bool istimeouttest = false; +static bool nettest = false; +static bool arrays = false; static void set_namespace(int nprocs, char *ranks, char *nspace, pmix_op_cbfunc_t cbfunc, myxfer_t *x); @@ -209,6 +211,32 @@ static void opcbfunc(pmix_status_t status, void *cbdata) DEBUG_WAKEUP_THREAD(&x->lock); } +static void setup_cbfunc(pmix_status_t status, + pmix_info_t info[], size_t ninfo, + void *provided_cbdata, + pmix_op_cbfunc_t cbfunc, void *cbdata) +{ + myxfer_t *x = (myxfer_t*)provided_cbdata; + size_t n; + + /* transfer it to the caddy for return to the main thread */ + if (0 < ninfo) { + PMIX_INFO_CREATE(x->info, ninfo); + x->ninfo = ninfo; + for (n=0; n < ninfo; n++) { + PMIX_INFO_XFER(&x->info[n], &info[n]); + } + } + + /* let the library release the data and cleanup from + * the operation */ + if (NULL != cbfunc) { + cbfunc(PMIX_SUCCESS, cbdata); + } + + DEBUG_WAKEUP_THREAD(&x->lock); +} + int main(int argc, char **argv) { char **client_env=NULL; @@ -225,6 +253,11 @@ int main(int argc, char **argv) size_t ninfo; mylock_t mylock; int ncycles=1, m, delay=0; + bool hwloc = false; +#if PMIX_HAVE_HWLOC + char *hwloc_file = NULL; +#endif + sigset_t unblock; /* smoke test */ if (PMIX_SUCCESS != 0) { @@ -260,30 +293,115 @@ int main(int argc, char **argv) 0 == strcmp("--sleep", argv[n])) && NULL != argv[n+1]) { delay = strtol(argv[n+1], NULL, 10); +#if PMIX_HAVE_HWLOC + } else if (0 == strcmp("-hwloc", argv[n]) || + 0 == strcmp("--hwloc", argv[n])) { + /* test hwloc support */ + hwloc = true; + } else if (0 == strcmp("-hwloc-file", argv[n]) || + 0 == strcmp("--hwloc-file", argv[n])) { + if (NULL == argv[n+1]) { + fprintf(stderr, "The --hwloc-file option requires an argument\n"); + exit(1); + } + hwloc_file = strdup(argv[n+1]); + hwloc = true; + ++n; +#endif } else if (0 == strcmp("-h", argv[n])) { /* print the options and exit */ fprintf(stderr, "usage: simptest \n"); fprintf(stderr, " -n N Number of clients to run\n"); fprintf(stderr, " -e foo Name of the client executable to run (default: simpclient\n"); fprintf(stderr, " -reps N Cycle for N repetitions"); + fprintf(stderr, " -hwloc Test hwloc support\n"); + fprintf(stderr, " -hwloc-file FILE Use file to import topology\n"); + fprintf(stderr, " -net-test Test network endpt assignments\n"); + fprintf(stderr, " -arrays Use the job session array to pass registration info\n"); exit(0); + } else if (0 == strcmp("-net-test", argv[n]) || + 0 == strcmp("--net-test", argv[n])) { + /* test network support */ + nettest = true; + } else if (0 == strcmp("-arrays", argv[n]) || + 0 == strcmp("--arrays", argv[n])) { + /* test network support */ + arrays = true; } } if (NULL == executable) { - executable = strdup("./quietclient"); + if (nettest) { + executable = strdup("./simpcoord"); + } else { + executable = strdup("./quietclient"); + } + } + /* check for executable existence and permissions */ + if (0 != access(executable, X_OK)) { + fprintf(stderr, "Executable %s not found or missing executable permissions\n", executable); + exit(1); + } + + /* ensure that SIGCHLD is unblocked as we need to capture it */ + if (0 != sigemptyset(&unblock)) { + fprintf(stderr, "SIGEMPTYSET FAILED\n"); + exit(1); + } + if (0 != sigaddset(&unblock, SIGCHLD)) { + fprintf(stderr, "SIGADDSET FAILED\n"); + exit(1); + } + if (0 != sigprocmask(SIG_UNBLOCK, &unblock, NULL)) { + fprintf(stderr, "SIG_UNBLOCK FAILED\n"); + exit(1); } + /* setup the server library and tell it to support tool connections */ +#if PMIX_HAVE_HWLOC + if (hwloc) { +#if HWLOC_API_VERSION < 0x20000 + ninfo = 4; +#else + ninfo = 5; +#endif + } else { + ninfo = 4; + } +#else ninfo = 3; +#endif PMIX_INFO_CREATE(info, ninfo); PMIX_INFO_LOAD(&info[0], PMIX_SERVER_TOOL_SUPPORT, NULL, PMIX_BOOL); PMIX_INFO_LOAD(&info[1], PMIX_USOCK_DISABLE, NULL, PMIX_BOOL); PMIX_INFO_LOAD(&info[2], PMIX_SERVER_GATEWAY, NULL, PMIX_BOOL); +#if PMIX_HAVE_HWLOC + if (hwloc) { + if (NULL != hwloc_file) { + PMIX_INFO_LOAD(&info[3], PMIX_TOPOLOGY_FILE, hwloc_file, PMIX_STRING); + } else { + PMIX_INFO_LOAD(&info[3], PMIX_TOPOLOGY, NULL, PMIX_STRING); + } +#if HWLOC_API_VERSION >= 0x20000 + PMIX_INFO_LOAD(&info[4], PMIX_HWLOC_SHARE_TOPO, NULL, PMIX_BOOL); +#endif + } +#endif + if (nettest) { + /* set a known network configuration for the pnet/test component */ + putenv("PMIX_MCA_pnet_test_nverts=nodes:5;plane:d:3;plane:s:2;plane:d:5"); + putenv("PMIX_MCA_pnet=test"); + } + if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, info, ninfo))) { fprintf(stderr, "Init failed with error %d\n", rc); return rc; } PMIX_INFO_FREE(info, ninfo); + if (nettest) { + unsetenv("PMIX_MCA_pnet"); + unsetenv("PMIX_MCA_pnet_test_nverts"); + } /* register the default errhandler */ DEBUG_CONSTRUCT_LOCK(&mylock); @@ -304,9 +422,9 @@ int main(int argc, char **argv) /* setup to see sigchld on the forked tests */ PMIX_CONSTRUCT(&children, pmix_list_t); - event_assign(&handler, pmix_globals.evbase, SIGCHLD, - EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler); - event_add(&handler, NULL); + pmix_event_assign(&handler, pmix_globals.evbase, SIGCHLD, + EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler); + pmix_event_add(&handler, NULL); for (m=0; m < ncycles; m++) { fprintf(stderr, "Running cycle %d\n", m); @@ -445,43 +563,186 @@ int main(int argc, char **argv) static void set_namespace(int nprocs, char *ranks, char *nspace, pmix_op_cbfunc_t cbfunc, myxfer_t *x) { - char *regex, *ppn; - char hostname[PMIX_MAXHOSTNAMELEN]; + char *regex, *ppn, *rks; + int n, m, k; + pmix_data_array_t *array; + pmix_info_t *info, *iptr, *ip; + myxfer_t cd, lock; + pmix_status_t rc; - gethostname(hostname, sizeof(hostname)); - x->ninfo = 7; + if (arrays) { + x->ninfo = 15 + nprocs; + } else { + x->ninfo = 16 + nprocs; + } PMIX_INFO_CREATE(x->info, x->ninfo); - (void)strncpy(x->info[0].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); - x->info[0].value.type = PMIX_UINT32; - x->info[0].value.data.uint32 = nprocs; - - (void)strncpy(x->info[1].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); - x->info[1].value.type = PMIX_UINT32; - x->info[1].value.data.uint32 = 0; - - (void)strncpy(x->info[2].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); - x->info[2].value.type = PMIX_UINT32; - x->info[2].value.data.uint32 = nprocs; - - (void)strncpy(x->info[3].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); - x->info[3].value.type = PMIX_STRING; - x->info[3].value.data.string = strdup(ranks); - - PMIx_generate_regex(hostname, ®ex); - (void)strncpy(x->info[4].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); - x->info[4].value.type = PMIX_STRING; - x->info[4].value.data.string = regex; - - PMIx_generate_ppn(ranks, &ppn); - (void)strncpy(x->info[5].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); - x->info[5].value.type = PMIX_STRING; - x->info[5].value.data.string = ppn; + n = 0; + + PMIx_generate_regex("test000,test001,test002", ®ex); + PMIx_generate_ppn("0;1;2", &ppn); + + if (arrays) { + (void)strncpy(x->info[n].key, PMIX_JOB_INFO_ARRAY, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_DATA_ARRAY; + PMIX_DATA_ARRAY_CREATE(x->info[n].value.data.darray, 2, PMIX_INFO); + iptr = (pmix_info_t*)x->info[n].value.data.darray->array; + (void)strncpy(iptr[0].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); + iptr[0].value.type = PMIX_STRING; + iptr[0].value.data.string = regex; + (void)strncpy(iptr[1].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); + iptr[1].value.type = PMIX_STRING; + iptr[1].value.data.string = ppn; + ++n; + } else { + (void)strncpy(x->info[n].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = regex; + ++n; + + /* if we have some empty nodes, then fill their spots */ + (void)strncpy(x->info[n].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = ppn; + ++n; + } - (void)strncpy(x->info[6].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); - x->info[6].value.type = PMIX_UINT32; - x->info[6].value.data.uint32 = nprocs; + /* we have the required info to run setup_app, so do that now */ + PMIX_INFO_CREATE(iptr, 4); + PMIX_INFO_XFER(&iptr[0], &x->info[0]); + PMIX_INFO_XFER(&iptr[1], &x->info[1]); + PMIX_INFO_LOAD(&iptr[2], PMIX_SETUP_APP_ENVARS, NULL, PMIX_BOOL); + PMIX_LOAD_KEY(iptr[3].key, PMIX_ALLOC_NETWORK); + iptr[3].value.type = PMIX_DATA_ARRAY; + PMIX_DATA_ARRAY_CREATE(iptr[3].value.data.darray, 2, PMIX_INFO); + ip = (pmix_info_t*)iptr[3].value.data.darray->array; + asprintf(&rks, "%s.net", nspace); + PMIX_INFO_LOAD(&ip[0], PMIX_ALLOC_NETWORK_ID, rks, PMIX_STRING); + free(rks); + PMIX_INFO_LOAD(&ip[1], PMIX_ALLOC_NETWORK_SEC_KEY, NULL, PMIX_BOOL); + PMIX_CONSTRUCT(&cd, myxfer_t); + if (PMIX_SUCCESS != (rc = PMIx_server_setup_application(nspace, iptr, 4, + setup_cbfunc, &cd))) { + pmix_output(0, "[%s:%d] PMIx_server_setup_application failed: %s", __FILE__, __LINE__, PMIx_Error_string(rc)); + DEBUG_DESTRUCT_LOCK(&cd.lock); + } else { + DEBUG_WAIT_THREAD(&cd.lock); + } + /* use the results to setup the local subsystems */ + PMIX_CONSTRUCT(&lock, myxfer_t); + if (PMIX_SUCCESS != (rc = PMIx_server_setup_local_support(nspace, cd.info, cd.ninfo, + opcbfunc, &lock))) { + pmix_output(0, "[%s:%d] PMIx_server_setup_local_support failed: %s", __FILE__, __LINE__, PMIx_Error_string(rc)); + } else { + DEBUG_WAIT_THREAD(&lock.lock); + } + PMIX_DESTRUCT(&lock); + PMIX_DESTRUCT(&cd); + + (void)strncpy(x->info[n].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 0; + ++n; + + (void)strncpy(x->info[n].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = strdup(ranks); + ++n; + + (void)strncpy(x->info[n].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_JOBID, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_STRING; + x->info[n].value.data.string = strdup("1234"); + ++n; + + (void)strncpy(x->info[n].key, PMIX_NPROC_OFFSET, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 0; + ++n; + + (void)strncpy(x->info[n].key, PMIX_NODEID, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 0; + ++n; + + (void)strncpy(x->info[n].key, PMIX_NODE_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_NUM_NODES, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 1; + ++n; + + (void)strncpy(x->info[n].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_MAX_PROCS, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = nprocs; + ++n; + + (void)strncpy(x->info[n].key, PMIX_JOB_NUM_APPS, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_UINT32; + x->info[n].value.data.uint32 = 1; + ++n; + + (void)strncpy(x->info[n].key, PMIX_LOCALLDR, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_PROC_RANK; + x->info[n].value.data.uint32 = 0; + ++n; + + /* add the proc-specific data */ + for (m=0; m < nprocs; m++) { + (void)strncpy(x->info[n].key, PMIX_PROC_DATA, PMIX_MAX_KEYLEN); + x->info[n].value.type = PMIX_DATA_ARRAY; + PMIX_DATA_ARRAY_CREATE(array, 5, PMIX_INFO); + x->info[n].value.data.darray = array; + info = (pmix_info_t*)array->array; + k = 0; + (void)strncpy(info[k].key, PMIX_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_PROC_RANK; + info[k].value.data.rank = m; + ++k; + (void)strncpy(info[k].key, PMIX_GLOBAL_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_PROC_RANK; + info[k].value.data.rank = m; + ++k; + (void)strncpy(info[k].key, PMIX_LOCAL_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_UINT16; + info[k].value.data.uint16 = m; + ++k; + + (void)strncpy(info[k].key, PMIX_NODE_RANK, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_UINT16; + info[k].value.data.uint16 = m; + ++k; + + (void)strncpy(info[k].key, PMIX_NODEID, PMIX_MAX_KEYLEN); + info[k].value.type = PMIX_UINT32; + info[k].value.data.uint32 = 0; + ++k; + /* move to next proc */ + ++n; + } PMIx_server_register_nspace(nspace, nprocs, x->info, x->ninfo, cbfunc, x); } @@ -852,7 +1113,7 @@ static void wait_signal_callback(int fd, short event, void *arg) pid_t pid; wait_tracker_t *t2; - if (SIGCHLD != event_get_signal(sig)) { + if (SIGCHLD != pmix_event_get_signal(sig)) { return; } diff --git a/opal/mca/pmix/pmix3x/pmix/test/test_common.h b/opal/mca/pmix/pmix3x/pmix/test/test_common.h index 10b180e6598..490f68323be 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/test_common.h +++ b/opal/mca/pmix/pmix3x/pmix/test/test_common.h @@ -289,9 +289,9 @@ typedef struct { } \ if (PMIX_SUCCESS == rc) { \ if( PMIX_SUCCESS != cbdata.status ){ \ - if( !( (cbdata.status == PMIX_ERR_NOT_FOUND || cbdata.status == PMIX_ERR_PROC_ENTRY_NOT_FOUND) && ok_notfnd ) ){ \ - TEST_ERROR(("%s:%d: PMIx_Get_nb failed: %d from %s:%d, key=%s", \ - my_nspace, my_rank, rc, my_nspace, r)); \ + if( !( (cbdata.status == PMIX_ERR_NOT_FOUND || cbdata.status == PMIX_ERR_PROC_ENTRY_NOT_FOUND) && ok_notfnd ) ){ \ + TEST_ERROR(("%s:%d: PMIx_Get_nb failed: %d from %s:%d, key=%s", \ + my_nspace, my_rank, rc, my_nspace, r, key)); \ } \ rc = PMIX_ERROR; \ } else if (NULL == val) { \ diff --git a/opal/mca/pmix/pmix3x/pmix/test/test_error.c b/opal/mca/pmix/pmix3x/pmix/test/test_error.c index f5217f0657c..24a63da4917 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/test_error.c +++ b/opal/mca/pmix/pmix3x/pmix/test/test_error.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,7 +38,7 @@ static void timeout_errhandler(size_t evhdlr_registration_id, void *cbdata) { TEST_ERROR(("timeout errhandler called for error status = %d ninfo = %d", - status, ninfo)); + status, (int)ninfo)); if (NULL != cbfunc) { cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); } @@ -58,7 +58,7 @@ static void errhandler_reg_callbk1 (pmix_status_t status, size_t *ref = (size_t*) cbdata; *ref = errhandler_ref; TEST_VERBOSE(("PMIX client ERRHANDLER REGISTRATION CALLED WITH STATUS %d, ref=%lu", - status, *ref, (unsigned long)errhandler_ref)); + status, (unsigned long)errhandler_ref)); } diff --git a/opal/mca/pmix/pmix3x/pmix/test/test_server.c b/opal/mca/pmix/pmix3x/pmix/test/test_server.c index 426014149ef..30d174a9567 100644 --- a/opal/mca/pmix/pmix3x/pmix/test/test_server.c +++ b/opal/mca/pmix/pmix3x/pmix/test/test_server.c @@ -77,7 +77,7 @@ PMIX_CLASS_INSTANCE(server_nspace_t, nscon, nsdes); static int server_send_procs(void); -static void server_read_cb(evutil_socket_t fd, short event, void *arg); +static void server_read_cb(int fd, short event, void *arg); static int srv_wait_all(double timeout); static int server_fwd_msg(msg_hdr_t *msg_hdr, char *buf, size_t size); static int server_send_msg(msg_hdr_t *msg_hdr, char *data, size_t size); @@ -187,7 +187,7 @@ static void server_unpack_procs(char *buf, size_t size) char *nspace; while ((size_t)(ptr - buf) < size) { - ns_count = (size_t)*ptr; + ns_count = *(size_t *)ptr; ptr += sizeof(size_t); for (i = 0; i < ns_count; i++) { @@ -195,16 +195,16 @@ static void server_unpack_procs(char *buf, size_t size) size_t ltasks, ntasks; int server_id; - server_id = *ptr; + server_id = *(int *)ptr; ptr += sizeof(int); nspace = ptr; ptr += PMIX_MAX_NSLEN+1; - ntasks = (size_t)*ptr; + ntasks = *(size_t *)ptr; ptr += sizeof(size_t); - ltasks = (size_t)*ptr; + ltasks = *(size_t *)ptr; ptr += sizeof(size_t); PMIX_LIST_FOREACH(tmp, server_nspace, server_nspace_t) { @@ -226,7 +226,7 @@ static void server_unpack_procs(char *buf, size_t size) } size_t i; for (i = 0; i < ltasks; i++) { - int rank = (int)*ptr; + int rank = *(int *)ptr; ptr += sizeof(int); if (ns_item->task_map[rank] >= 0) { continue; @@ -473,7 +473,7 @@ static void _libpmix_cb(void *cbdata) } } -static void server_read_cb(evutil_socket_t fd, short event, void *arg) +static void server_read_cb(int fd, short event, void *arg) { server_info_t *server = (server_info_t*)arg; msg_hdr_t msg_hdr; @@ -505,8 +505,8 @@ static void server_read_cb(evutil_socket_t fd, short event, void *arg) switch(msg_hdr.cmd) { case CMD_BARRIER_REQUEST: barrier_cnt++; - TEST_VERBOSE(("CMD_BARRIER_REQ req from %d cnt %d", msg_hdr.src_id, - barrier_cnt)); + TEST_VERBOSE(("CMD_BARRIER_REQ req from %d cnt %lu", msg_hdr.src_id, + (unsigned long)barrier_cnt)); if (pmix_list_get_size(server_list) == barrier_cnt) { barrier_cnt = 0; /* reset barrier counter */ server_info_t *tmp_server; @@ -535,8 +535,8 @@ static void server_read_cb(evutil_socket_t fd, short event, void *arg) msg_buf = NULL; } - TEST_VERBOSE(("CMD_FENCE_CONTRIB req from %d cnt %d size %d", - msg_hdr.src_id, contrib_cnt, msg_hdr.size)); + TEST_VERBOSE(("CMD_FENCE_CONTRIB req from %d cnt %lu size %d", + msg_hdr.src_id, (unsigned long)contrib_cnt, msg_hdr.size)); if (pmix_list_get_size(server_list) == contrib_cnt) { server_info_t *tmp_server; PMIX_LIST_FOREACH(tmp_server, server_list, server_info_t) { @@ -547,8 +547,8 @@ static void server_read_cb(evutil_socket_t fd, short event, void *arg) resp_hdr.size = fence_buf_offset; server_send_msg(&resp_hdr, fence_buf, fence_buf_offset); } - TEST_VERBOSE(("CMD_FENCE_CONTRIB complete, size %d", - fence_buf_offset)); + TEST_VERBOSE(("CMD_FENCE_CONTRIB complete, size %lu", + (unsigned long)fence_buf_offset)); if (fence_buf) { free(fence_buf); fence_buf = NULL; @@ -651,13 +651,13 @@ static void server_unpack_dmdx(char *buf, int *sender, pmix_proc_t *proc) { char *ptr = buf; - *sender = (int)*ptr; + *sender = *(int *)ptr; ptr += sizeof(int); memcpy(proc->nspace, ptr, PMIX_MAX_NSLEN +1); ptr += PMIX_MAX_NSLEN +1; - proc->rank = (int)*ptr; + proc->rank = *(int *)ptr; ptr += sizeof(int); } @@ -671,6 +671,8 @@ static void _dmdx_cb(int status, char *data, size_t sz, void *cbdata) msg_hdr.src_id = my_server_id; msg_hdr.size = sz; msg_hdr.dst_id = *sender_id; + TEST_VERBOSE(("srv #%d: DMDX RESPONSE: receiver=%d, size=%lu,", + my_server_id, *sender_id, (unsigned long)sz)); free(sender_id); server_send_msg(&msg_hdr, data, sz); @@ -804,9 +806,9 @@ int server_init(test_params *params) if (params->nservers && pmix_list_get_size(server_list)) { server_info_t *server; PMIX_LIST_FOREACH(server, server_list, server_info_t) { - server->evread = event_new(pmix_globals.evbase, server->rd_fd, + server->evread = pmix_event_new(pmix_globals.evbase, server->rd_fd, EV_READ|EV_PERSIST, server_read_cb, server); - event_add(server->evread, NULL); + pmix_event_add(server->evread, NULL); } } From b37c85dcca027abf1da36ad1477fc8687a6e9621 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 8 Aug 2019 10:45:40 +0900 Subject: [PATCH 62/94] coll/libnbc: fixes ompi ompi_coll_libnbc_request_t parent base ompi_coll_libnbc_request_t on top of ompi_coll_base_nbc_request_t to correctly support the retention of datatypes/operators This fixes a regression introduced in open-mpi/ompi@0fe756d4166eecf2f0ee2598da690c69a7c824c4 Signed-off-by: Gilles Gouaillardet (cherry picked from commit open-mpi/ompi@f8eef0fde99d44653835797b15988ebd3cd3ddc4) --- ompi/mca/coll/libnbc/coll_libnbc_component.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/mca/coll/libnbc/coll_libnbc_component.c b/ompi/mca/coll/libnbc/coll_libnbc_component.c index c5b1656385d..6598972773d 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc_component.c +++ b/ompi/mca/coll/libnbc/coll_libnbc_component.c @@ -460,6 +460,6 @@ request_construct(ompi_coll_libnbc_request_t *request) OBJ_CLASS_INSTANCE(ompi_coll_libnbc_request_t, - ompi_request_t, + ompi_coll_base_nbc_request_t, request_construct, NULL); From ae26957619d4a957b3dfe88af3f7652dce3b3590 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 8 Aug 2019 10:48:06 +0900 Subject: [PATCH 63/94] coll/base: cleanup ompi_coll_base_nbc_request_t elements Since ompi_coll_base_nbc_request_t is to be used in an opal_free_list_t, it must be returned into a "clean" state. So cleanup some data in the callback completion subroutines. This fixes a regression introduced in open-mpi/ompi@0fe756d4166eecf2f0ee2598da690c69a7c824c4 Signed-off-by: Gilles Gouaillardet (cherry picked from commit open-mpi/ompi@0862c409f1094cfccf5dc31ae7579676daa30b86) --- ompi/mca/coll/base/coll_base_util.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_util.c b/ompi/mca/coll/base/coll_base_util.c index 57fe14bad20..d414166d67e 100644 --- a/ompi/mca/coll/base/coll_base_util.c +++ b/ompi/mca/coll/base/coll_base_util.c @@ -108,9 +108,11 @@ int ompi_rounddown(int num, int factor) static void release_objs_callback(struct ompi_coll_base_nbc_request_t *request) { if (NULL != request->data.objs.objs[0]) { OBJ_RELEASE(request->data.objs.objs[0]); + request->data.objs.objs[0] = NULL; } if (NULL != request->data.objs.objs[1]) { OBJ_RELEASE(request->data.objs.objs[1]); + request->data.objs.objs[1] = NULL; } } @@ -207,15 +209,21 @@ static void release_vecs_callback(ompi_coll_base_nbc_request_t *request) { } else { scount = rcount = OMPI_COMM_IS_INTER(comm)?ompi_comm_remote_size(comm):ompi_comm_size(comm); } - for (int i=0; idata.vecs.stypes && NULL != request->data.vecs.stypes[i]) { - OMPI_DATATYPE_RELEASE(request->data.vecs.stypes[i]); + if (NULL != request->data.vecs.stypes) { + for (int i=0; idata.vecs.stypes[i]) { + OMPI_DATATYPE_RELEASE(request->data.vecs.stypes[i]); + } } + request->data.vecs.stypes = NULL; } - for (int i=0; idata.vecs.rtypes && NULL != request->data.vecs.rtypes[i]) { - OMPI_DATATYPE_RELEASE(request->data.vecs.rtypes[i]); + if (NULL != request->data.vecs.rtypes) { + for (int i=0; idata.vecs.rtypes[i]) { + OMPI_DATATYPE_RELEASE(request->data.vecs.rtypes[i]); + } } + request->data.vecs.rtypes = NULL; } } From 39ec580b76d4e579abdf2b1d7790036a51efa4bf Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 9 Aug 2019 09:57:56 +0900 Subject: [PATCH 64/94] coll/base: only retain datatypes/op if the request has not yet completed a non blocking collective might return ompi_request_null, so we should not retain anything in that case. Signed-off-by: Gilles Gouaillardet (cherry picked from commit open-mpi/ompi@63d3ccde9ddc922737fe3e307cc1af3a70474265) --- ompi/mca/coll/base/coll_base_util.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ompi/mca/coll/base/coll_base_util.c b/ompi/mca/coll/base/coll_base_util.c index d414166d67e..5736c0946ff 100644 --- a/ompi/mca/coll/base/coll_base_util.c +++ b/ompi/mca/coll/base/coll_base_util.c @@ -141,6 +141,9 @@ int ompi_coll_base_retain_op( ompi_request_t *req, ompi_op_t *op, ompi_datatype_t *type) { ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; bool retain = false; + if (REQUEST_COMPLETE(req)) { + return OMPI_SUCCESS; + } if (!ompi_op_is_intrinsic(op)) { OBJ_RETAIN(op); request->data.op.op = op; @@ -177,6 +180,9 @@ int ompi_coll_base_retain_datatypes( ompi_request_t *req, ompi_datatype_t *stype ompi_datatype_t *rtype) { ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req; bool retain = false; + if (REQUEST_COMPLETE(req)) { + return OMPI_SUCCESS; + } if (NULL != stype && !ompi_datatype_is_predefined(stype)) { OBJ_RETAIN(stype); request->data.types.stype = stype; @@ -254,6 +260,9 @@ int ompi_coll_base_retain_datatypes_w( ompi_request_t *req, bool retain = false; ompi_communicator_t *comm = request->super.req_mpi_object.comm; int scount, rcount; + if (REQUEST_COMPLETE(req)) { + return OMPI_SUCCESS; + } if (OMPI_COMM_IS_TOPO(comm)) { (void)mca_topo_base_neighbor_count (comm, &rcount, &scount); } else { From e9a034378086096c9018386254c6044a3d29924f Mon Sep 17 00:00:00 2001 From: Tomislav Janjusic Date: Fri, 28 Jun 2019 22:34:30 +0300 Subject: [PATCH 65/94] osc/ucx: Fix possible win creation/destruction race condition To avoid fully initializing the osc/ucx component for MPI application that are not using One-Sided functionality, the initialization happens at the first MPI window creation. This commit ensures atomicity of global state modifications. ported from: 6678ac0f557935b291ec2310216b7ea46e0c13b1 Signed-off-by: Artem Polyakov fix alignment, and fix error path --- ompi/mca/osc/ucx/osc_ucx_component.c | 73 +++++++++++++++++++++------- 1 file changed, 55 insertions(+), 18 deletions(-) diff --git a/ompi/mca/osc/ucx/osc_ucx_component.c b/ompi/mca/osc/ucx/osc_ucx_component.c index a5b392568b3..f4032a67151 100644 --- a/ompi/mca/osc/ucx/osc_ucx_component.c +++ b/ompi/mca/osc/ucx/osc_ucx_component.c @@ -21,6 +21,20 @@ memcpy(((char*)(_dst)) + (_off), _src, _len); \ (_off) += (_len); +opal_mutex_t mca_osc_service_mutex = OPAL_MUTEX_STATIC_INIT; +static void _osc_ucx_init_lock(void) +{ + if(mca_osc_ucx_component.enable_mpi_threads) { + opal_mutex_lock(&mca_osc_service_mutex); + } +} +static void _osc_ucx_init_unlock(void) +{ + if(mca_osc_ucx_component.enable_mpi_threads) { + opal_mutex_unlock(&mca_osc_service_mutex); + } +} + static int component_open(void); static int component_register(void); static int component_init(bool enable_progress_threads, bool enable_mpi_threads); @@ -254,6 +268,9 @@ static void ompi_osc_ucx_unregister_progress() { int ret; + /* May be called concurrently - protect */ + _osc_ucx_init_lock(); + mca_osc_ucx_component.num_modules--; OSC_UCX_ASSERT(mca_osc_ucx_component.num_modules >= 0); if (0 == mca_osc_ucx_component.num_modules) { @@ -262,6 +279,8 @@ static void ompi_osc_ucx_unregister_progress() OSC_UCX_VERBOSE(1, "opal_progress_unregister failed: %d", ret); } } + + _osc_ucx_init_unlock(); } static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, @@ -295,6 +314,8 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in return OMPI_ERR_NOT_SUPPORTED; } + _osc_ucx_init_lock(); + if (mca_osc_ucx_component.env_initialized == false) { ucp_config_t *config = NULL; ucp_params_t context_params; @@ -304,7 +325,8 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in status = ucp_config_read("MPI", NULL, &config); if (UCS_OK != status) { OSC_UCX_VERBOSE(1, "ucp_config_read failed: %d", status); - return OMPI_ERROR; + ret = OMPI_ERROR; + goto select_unlock; } OBJ_CONSTRUCT(&mca_osc_ucx_component.requests, opal_free_list_t); @@ -315,7 +337,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in 0, 0, 8, 0, 8, NULL, 0, NULL, NULL, NULL); if (OMPI_SUCCESS != ret) { OSC_UCX_VERBOSE(1, "opal_free_list_init failed: %d", ret); - goto error; + goto select_unlock; } /* initialize UCP context */ @@ -337,7 +359,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in if (UCS_OK != status) { OSC_UCX_VERBOSE(1, "ucp_init failed: %d", status); ret = OMPI_ERROR; - goto error; + goto select_unlock; } assert(mca_osc_ucx_component.ucp_worker == NULL); @@ -349,8 +371,8 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in &(mca_osc_ucx_component.ucp_worker)); if (UCS_OK != status) { OSC_UCX_VERBOSE(1, "ucp_worker_create failed: %d", status); - ret = OMPI_ERROR; - goto error_nomem; + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; + goto select_unlock; } /* query UCP worker attributes */ @@ -358,20 +380,44 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in status = ucp_worker_query(mca_osc_ucx_component.ucp_worker, &worker_attr); if (UCS_OK != status) { OSC_UCX_VERBOSE(1, "ucp_worker_query failed: %d", status); - ret = OMPI_ERROR; - goto error_nomem; + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; + goto select_unlock; } if (mca_osc_ucx_component.enable_mpi_threads == true && worker_attr.thread_mode != UCS_THREAD_MODE_MULTI) { OSC_UCX_VERBOSE(1, "ucx does not support multithreading"); - ret = OMPI_ERROR; - goto error_nomem; + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; + goto select_unlock; } mca_osc_ucx_component.env_initialized = true; env_initialized = true; } + + mca_osc_ucx_component.num_modules++; + + OSC_UCX_ASSERT(mca_osc_ucx_component.num_modules > 0); + if (1 == mca_osc_ucx_component.num_modules) { + ret = opal_progress_register(progress_callback); + if (OMPI_SUCCESS != ret) { + OSC_UCX_VERBOSE(1, "opal_progress_register failed: %d", ret); + goto select_unlock; + } + } + +select_unlock: + _osc_ucx_init_unlock(); + switch(ret) { + case OMPI_SUCCESS: + break; + case OMPI_ERROR: + goto error; + case OMPI_ERR_TEMP_OUT_OF_RESOURCE: + goto error_nomem; + default: + goto error; + } /* create module structure */ module = (ompi_osc_ucx_module_t *)calloc(1, sizeof(ompi_osc_ucx_module_t)); @@ -380,7 +426,6 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in goto error_nomem; } - mca_osc_ucx_component.num_modules++; /* fill in the function pointer part */ memcpy(module, &ompi_osc_ucx_module_template, sizeof(ompi_osc_base_module_t)); @@ -648,14 +693,6 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in goto error; } - OSC_UCX_ASSERT(mca_osc_ucx_component.num_modules > 0); - if (1 == mca_osc_ucx_component.num_modules) { - ret = opal_progress_register(progress_callback); - if (OMPI_SUCCESS != ret) { - OSC_UCX_VERBOSE(1, "opal_progress_register failed: %d", ret); - goto error; - } - } return ret; error: From 14f3fbb8c1d52671194042b0c83e3a360d9fb88c Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 22 Jul 2019 07:28:54 -0700 Subject: [PATCH 66/94] Provide locality for all procs on node Update PMIx to latest master to get supporting updates. For connect/accept (part of comm_spawn as well), lookup locality for all participating procs on the node and compute the relative locality so it can be used for MPI operations. Signed-off-by: Ralph Castain (cherry picked from commit d202e10c1407d2f9177e9b871eadde1f25526676) --- ompi/dpm/dpm.c | 73 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 65 insertions(+), 8 deletions(-) diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c index a9a2de586c4..bc3ad8e002e 100644 --- a/ompi/dpm/dpm.c +++ b/ompi/dpm/dpm.c @@ -15,7 +15,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -404,9 +404,43 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, goto exit; } if (0 < opal_list_get_size(&ilist)) { + uint32_t *peer_ranks = NULL; + int prn, nprn; + char *val, *mycpuset; + uint16_t u16; + opal_process_name_t wildcard_rank; /* convert the list of new procs to a proc_t array */ new_proc_list = (ompi_proc_t**)calloc(opal_list_get_size(&ilist), sizeof(ompi_proc_t *)); + /* get the list of local peers for the new procs */ + cd = (ompi_dpm_proct_caddy_t*)opal_list_get_first(&ilist); + proc = cd->p; + wildcard_rank.jobid = proc->super.proc_name.jobid; + wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid; + /* retrieve the local peers */ + OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCAL_PEERS, + &wildcard_rank, &val, OPAL_STRING); + if (OPAL_SUCCESS == rc && NULL != val) { + char **peers = opal_argv_split(val, ','); + free(val); + nprn = opal_argv_count(peers); + peer_ranks = (uint32_t*)calloc(nprn, sizeof(uint32_t)); + for (prn = 0; NULL != peers[prn]; prn++) { + peer_ranks[prn] = strtoul(peers[prn], NULL, 10); + } + opal_argv_free(peers); + } + + /* get my locality string */ + val = NULL; + OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCALITY_STRING, + OMPI_PROC_MY_NAME, &val, OPAL_STRING); + if (OPAL_SUCCESS == rc && NULL != val) { + mycpuset = val; + } else { + mycpuset = NULL; + } + i = 0; OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) { opal_value_t *kv; @@ -416,15 +450,38 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, * OPAL_PMIX_LOCALITY and OPAL_PMIX_HOSTNAME. since we can live without * them, we are just fine */ ompi_proc_complete_init_single(proc); - /* save the locality for later */ - kv = OBJ_NEW(opal_value_t); - kv->key = strdup(OPAL_PMIX_LOCALITY); - kv->type = OPAL_UINT16; - kv->data.uint16 = proc->super.proc_flags; - opal_pmix.store_local(&proc->super.proc_name, kv); - OBJ_RELEASE(kv); // maintain accounting + /* if this proc is local, then get its locality */ + if (NULL != peer_ranks) { + for (prn=0; prn < nprn; prn++) { + if (peer_ranks[prn] == proc->super.proc_name.vpid) { + /* get their locality string */ + val = NULL; + OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, OPAL_PMIX_LOCALITY_STRING, + &proc->super.proc_name, &val, OPAL_STRING); + if (OPAL_SUCCESS == rc && NULL != val) { + u16 = opal_hwloc_compute_relative_locality(mycpuset, val); + free(val); + } else { + /* all we can say is that it shares our node */ + u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; + } + proc->super.proc_flags = u16; + /* save the locality for later */ + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_LOCALITY); + kv->type = OPAL_UINT16; + kv->data.uint16 = proc->super.proc_flags; + opal_pmix.store_local(&proc->super.proc_name, kv); + OBJ_RELEASE(kv); // maintain accounting + break; + } + } + } ++i; } + if (NULL != mycpuset) { + free(mycpuset); + } /* call add_procs on the new ones */ rc = MCA_PML_CALL(add_procs(new_proc_list, opal_list_get_size(&ilist))); free(new_proc_list); From e17203b4f7f07f128177fe9f8cea30697f13d390 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 12 Aug 2019 12:24:05 -0700 Subject: [PATCH 67/94] Silence Coverity warning Signed-off-by: Ralph Castain --- ompi/dpm/dpm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c index bc3ad8e002e..9fb42dcce08 100644 --- a/ompi/dpm/dpm.c +++ b/ompi/dpm/dpm.c @@ -482,6 +482,9 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, if (NULL != mycpuset) { free(mycpuset); } + if (NULL != peer_ranks) { + free(peer_ranks); + } /* call add_procs on the new ones */ rc = MCA_PML_CALL(add_procs(new_proc_list, opal_list_get_size(&ilist))); free(new_proc_list); From d72d39bfee83977094f92f9fc1d7346c5e22e633 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Mon, 5 Aug 2019 15:56:25 -0500 Subject: [PATCH 68/94] io_ompio_file_open: fix offset calculation with SEEK_END and SEEK_CUR. fixes an issue reported by Wei-keng Liao Fixes Issue #6858 Signed-off-by: Edgar Gabriel --- ompi/mca/io/ompio/io_ompio_file_open.c | 48 ++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/ompi/mca/io/ompio/io_ompio_file_open.c b/ompi/mca/io/ompio/io_ompio_file_open.c index 37bc8fea572..dbb62d718cc 100644 --- a/ompi/mca/io/ompio/io_ompio_file_open.c +++ b/ompi/mca/io/ompio/io_ompio_file_open.c @@ -385,6 +385,45 @@ int mca_io_ompio_file_sync (ompi_file_t *fh) return ret; } +static void mca_io_ompio_file_get_eof_offset (ompio_file_t *fh, + OMPI_MPI_OFFSET_TYPE in_offset, + OMPI_MPI_OFFSET_TYPE *out_offset) +{ + /* a file_seek with SEEK_END might require an actual offset that is + not lined up with the end of the file, depending on the file view. + This routine determines the closest (smaller or equal) offset to + the provided in_offset value, avoiding gaps in the file view and avoiding to + break up an etype. + */ + OMPI_MPI_OFFSET_TYPE offset=0, prev_offset=0, start_offset=0; + size_t k=0, blocklen=0; + size_t index_in_file_view=0; + + in_offset -= fh->f_disp; + if ( fh->f_view_size > 0 ) { + /* starting offset of the current copy of the filew view */ + start_offset = in_offset / fh->f_view_extent; + + index_in_file_view = 0; + /* determine block id that the offset is located in and + the starting offset of that block */ + while ( offset <= in_offset && index_in_file_view < fh->f_iov_count) { + prev_offset = offset; + offset = start_offset + (OMPI_MPI_OFFSET_TYPE)(intptr_t) fh->f_decoded_iov[index_in_file_view++].iov_base; + } + + offset = prev_offset; + blocklen = fh->f_decoded_iov[index_in_file_view-1].iov_len; + while ( offset <= in_offset && k <= blocklen ) { + prev_offset = offset; + offset += fh->f_etype_size; + k += fh->f_etype_size; + } + + *out_offset = prev_offset; + } + return; +} int mca_io_ompio_file_seek (ompi_file_t *fh, OMPI_MPI_OFFSET_TYPE off, @@ -392,7 +431,7 @@ int mca_io_ompio_file_seek (ompi_file_t *fh, { int ret = OMPI_SUCCESS; mca_common_ompio_data_t *data; - OMPI_MPI_OFFSET_TYPE offset, temp_offset; + OMPI_MPI_OFFSET_TYPE offset, temp_offset, temp_offset2; data = (mca_common_ompio_data_t *) fh->f_io_selected_data; @@ -409,7 +448,7 @@ int mca_io_ompio_file_seek (ompi_file_t *fh, case MPI_SEEK_CUR: ret = mca_common_ompio_file_get_position (&data->ompio_fh, &temp_offset); - offset += temp_offset; + offset += temp_offset * data->ompio_fh.f_etype_size; if (offset < 0) { OPAL_THREAD_UNLOCK(&fh->f_lock); return OMPI_ERROR; @@ -417,7 +456,9 @@ int mca_io_ompio_file_seek (ompi_file_t *fh, break; case MPI_SEEK_END: ret = data->ompio_fh.f_fs->fs_file_get_size (&data->ompio_fh, - &temp_offset); + &temp_offset2); + mca_io_ompio_file_get_eof_offset (&data->ompio_fh, + temp_offset2, &temp_offset); offset += temp_offset; if (offset < 0 || OMPI_SUCCESS != ret) { OPAL_THREAD_UNLOCK(&fh->f_lock); @@ -436,6 +477,7 @@ int mca_io_ompio_file_seek (ompi_file_t *fh, return ret; } + int mca_io_ompio_file_get_position (ompi_file_t *fd, OMPI_MPI_OFFSET_TYPE *offset) { From 83d40c1e14c370bea685314988371856c9ad57ae Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 14 Aug 2019 01:05:28 -0400 Subject: [PATCH 69/94] Fix the stack displacement. Fixes the convertor iovec description on the MPI-IO reported by Edgar. Signed-off-by: George Bosilca --- opal/datatype/opal_convertor_raw.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/opal/datatype/opal_convertor_raw.c b/opal/datatype/opal_convertor_raw.c index 893792583f9..3d22cd792a3 100644 --- a/opal/datatype/opal_convertor_raw.c +++ b/opal/datatype/opal_convertor_raw.c @@ -126,8 +126,8 @@ opal_convertor_raw( opal_convertor_t* pConvertor, const ddt_elem_desc_t* current = &(pElem->elem); if( count_desc != (current->count * current->blocklen) ) { /* Not the full element description */ - do_now = current->blocklen - (count_desc % current->blocklen); /* how much left in the block */ - if( do_now ) { + if( (do_now = count_desc % current->blocklen) ) { + do_now = current->blocklen - do_now; /* how much left in the block */ source_base += current->disp; blength = do_now * opal_datatype_basicDatatypes[current->common.type]->size; OPAL_DATATYPE_SAFEGUARD_POINTER( source_base, blength, pConvertor->pBaseBuf, @@ -136,12 +136,12 @@ opal_convertor_raw( opal_convertor_t* pConvertor, index, (void*)source_base, blength ); ); opal_convertor_merge_iov( iov, iov_count, (IOVBASE_TYPE *) source_base, blength, &index ); - /* not check the return value, we know there was at least one element in the iovec */ + /* ignore the return value, we know there was at least one element in the iovec */ sum_iov_len += blength; count_desc -= do_now; - source_base += (current->extent - current->disp + - (current->blocklen - do_now) * opal_datatype_basicDatatypes[current->common.type]->size); + source_base += (blength - current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size + + current->extent - current->disp); } } } @@ -258,7 +258,7 @@ opal_convertor_raw( opal_convertor_t* pConvertor, } /* I complete an element, next step I should go to the next one */ PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_UINT1, count_desc, - source_base - pStack->disp - pConvertor->pBaseBuf ); + source_base - pConvertor->pBaseBuf ); DO_DEBUG( opal_output( 0, "raw save stack stack_pos %d pos_desc %d count_desc %" PRIsize_t " disp %ld\n", pConvertor->stack_pos, pStack->index, pStack->count, (long)pStack->disp ); ); return 0; From 8e6e826b54725ded4f15f9677ff0a11f80ee3e6c Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 14 Aug 2019 10:59:50 -0400 Subject: [PATCH 70/94] Fix the variable names used for the datatype dump. Signed-off-by: George Bosilca --- opal/datatype/opal_convertor_internal.h | 5 --- opal/datatype/opal_convertor_raw.c | 2 +- opal/datatype/opal_datatype_copy.c | 2 +- opal/datatype/opal_datatype_internal.h | 9 +++-- opal/datatype/opal_datatype_module.c | 51 +++++++++++++++---------- opal/datatype/opal_datatype_pack.c | 4 +- opal/datatype/opal_datatype_position.c | 2 +- opal/datatype/opal_datatype_unpack.c | 2 +- 8 files changed, 41 insertions(+), 36 deletions(-) diff --git a/opal/datatype/opal_convertor_internal.h b/opal/datatype/opal_convertor_internal.h index 025633cb7e7..39690f5bd19 100644 --- a/opal/datatype/opal_convertor_internal.h +++ b/opal/datatype/opal_convertor_internal.h @@ -50,11 +50,6 @@ opal_convertor_master_t* opal_convertor_find_or_create_master( uint32_t remote_a void opal_convertor_destroy_masters( void ); -#if OPAL_ENABLE_DEBUG -extern bool opal_pack_debug; -extern bool opal_unpack_debug; -#endif /* OPAL_ENABLE_DEBUG */ - END_C_DECLS #endif /* OPAL_CONVERTOR_INTERNAL_HAS_BEEN_INCLUDED */ diff --git a/opal/datatype/opal_convertor_raw.c b/opal/datatype/opal_convertor_raw.c index 3d22cd792a3..5bea5dcf5b8 100644 --- a/opal/datatype/opal_convertor_raw.c +++ b/opal/datatype/opal_convertor_raw.c @@ -25,7 +25,7 @@ #if OPAL_ENABLE_DEBUG #include "opal/util/output.h" -#define DO_DEBUG(INST) if( opal_pack_debug ) { INST } +#define DO_DEBUG(INST) if( opal_ddt_raw_debug ) { INST } #else #define DO_DEBUG(INST) #endif /* OPAL_ENABLE_DEBUG */ diff --git a/opal/datatype/opal_datatype_copy.c b/opal/datatype/opal_datatype_copy.c index 7bf94ef97b9..c70bdd24dfa 100644 --- a/opal/datatype/opal_datatype_copy.c +++ b/opal/datatype/opal_datatype_copy.c @@ -36,7 +36,7 @@ #if OPAL_ENABLE_DEBUG -#define DO_DEBUG(INST) if( opal_copy_debug ) { INST } +#define DO_DEBUG(INST) if( opal_ddt_copy_debug ) { INST } #else #define DO_DEBUG(INST) #endif /* OPAL_ENABLE_DEBUG */ diff --git a/opal/datatype/opal_datatype_internal.h b/opal/datatype/opal_datatype_internal.h index 1f10c9138aa..bdeb0cc429e 100644 --- a/opal/datatype/opal_datatype_internal.h +++ b/opal/datatype/opal_datatype_internal.h @@ -496,10 +496,11 @@ OPAL_DECLSPEC int opal_datatype_contain_basic_datatypes( const struct opal_datat OPAL_DECLSPEC int opal_datatype_dump_data_flags( unsigned short usflags, char* ptr, size_t length ); OPAL_DECLSPEC int opal_datatype_dump_data_desc( union dt_elem_desc* pDesc, int nbElems, char* ptr, size_t length ); -extern bool opal_position_debug; -extern bool opal_copy_debug; -extern bool opal_unpack_debug; -extern bool opal_pack_debug; +extern bool opal_ddt_position_debug; +extern bool opal_ddt_copy_debug; +extern bool opal_ddt_unpack_debug; +extern bool opal_ddt_pack_debug; +extern bool opal_ddt_raw_debug; END_C_DECLS #endif /* OPAL_DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED */ diff --git a/opal/datatype/opal_datatype_module.c b/opal/datatype/opal_datatype_module.c index d4415b21ef1..ba933b5fe2b 100644 --- a/opal/datatype/opal_datatype_module.c +++ b/opal/datatype/opal_datatype_module.c @@ -37,10 +37,11 @@ /* by default the debuging is turned off */ int opal_datatype_dfd = -1; -bool opal_unpack_debug = false; -bool opal_pack_debug = false; -bool opal_position_debug = false; -bool opal_copy_debug = false; +bool opal_ddt_unpack_debug = false; +bool opal_ddt_pack_debug = false; +bool opal_ddt_position_debug = false; +bool opal_ddt_copy_debug = false; +bool opal_ddt_raw_debug = false; int opal_ddt_verbose = -1; /* Has the datatype verbose it's own output stream */ extern int opal_cuda_verbose; @@ -148,35 +149,43 @@ int opal_datatype_register_params(void) int ret; ret = mca_base_var_register ("opal", "mpi", NULL, "ddt_unpack_debug", - "Whether to output debugging information in the ddt unpack functions (nonzero = enabled)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_LOCAL, &opal_unpack_debug); + "Whether to output debugging information in the ddt unpack functions (nonzero = enabled)", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, &opal_ddt_unpack_debug); if (0 > ret) { - return ret; + return ret; } ret = mca_base_var_register ("opal", "mpi", NULL, "ddt_pack_debug", - "Whether to output debugging information in the ddt pack functions (nonzero = enabled)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_LOCAL, &opal_pack_debug); + "Whether to output debugging information in the ddt pack functions (nonzero = enabled)", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, &opal_ddt_pack_debug); if (0 > ret) { - return ret; + return ret; + } + + ret = mca_base_var_register ("opal", "mpi", NULL, "ddt_raw_debug", + "Whether to output debugging information in the ddt raw functions (nonzero = enabled)", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, &opal_ddt_raw_debug); + if (0 > ret) { + return ret; } ret = mca_base_var_register ("opal", "mpi", NULL, "ddt_position_debug", - "Non zero lead to output generated by the datatype position functions", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_LOCAL, &opal_position_debug); + "Non zero lead to output generated by the datatype position functions", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, &opal_ddt_position_debug); if (0 > ret) { - return ret; + return ret; } ret = mca_base_var_register ("opal", "mpi", NULL, "ddt_copy_debug", - "Whether to output debugging information in the ddt copy functions (nonzero = enabled)", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_LOCAL, &opal_copy_debug); + "Whether to output debugging information in the ddt copy functions (nonzero = enabled)", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, &opal_ddt_copy_debug); if (0 > ret) { - return ret; + return ret; } ret = mca_base_var_register ("opal", "opal", NULL, "ddt_verbose", @@ -195,7 +204,7 @@ int opal_datatype_register_params(void) OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_LOCAL, &opal_cuda_verbose); if (0 > ret) { - return ret; + return ret; } #endif diff --git a/opal/datatype/opal_datatype_pack.c b/opal/datatype/opal_datatype_pack.c index c0ab6df66d8..f21adcccb34 100644 --- a/opal/datatype/opal_datatype_pack.c +++ b/opal/datatype/opal_datatype_pack.c @@ -31,7 +31,7 @@ #if OPAL_ENABLE_DEBUG #include "opal/util/output.h" -#define DO_DEBUG(INST) if( opal_pack_debug ) { INST } +#define DO_DEBUG(INST) if( opal_ddt_pack_debug ) { INST } #else #define DO_DEBUG(INST) #endif /* OPAL_ENABLE_DEBUG */ @@ -272,7 +272,7 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor, for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { iov_ptr = (unsigned char *) iov[iov_count].iov_base; iov_len_local = iov[iov_count].iov_len; - + if( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { if( (pElem->elem.count * pElem->elem.blocklen) != count_desc ) { /* we have a partial (less than blocklen) basic datatype */ diff --git a/opal/datatype/opal_datatype_position.c b/opal/datatype/opal_datatype_position.c index 204d670a3ef..02ec55651a0 100644 --- a/opal/datatype/opal_datatype_position.c +++ b/opal/datatype/opal_datatype_position.c @@ -33,7 +33,7 @@ #if OPAL_ENABLE_DEBUG #include "opal/util/output.h" -#define DO_DEBUG(INST) if( opal_position_debug ) { INST } +#define DO_DEBUG(INST) if( opal_ddt_position_debug ) { INST } #else #define DO_DEBUG(INST) #endif /* OPAL_ENABLE_DEBUG */ diff --git a/opal/datatype/opal_datatype_unpack.c b/opal/datatype/opal_datatype_unpack.c index dca07796d99..0925bde736d 100644 --- a/opal/datatype/opal_datatype_unpack.c +++ b/opal/datatype/opal_datatype_unpack.c @@ -33,7 +33,7 @@ #if OPAL_ENABLE_DEBUG #include "opal/util/output.h" -#define DO_DEBUG(INST) if( opal_unpack_debug ) { INST } +#define DO_DEBUG(INST) if( opal_ddt_unpack_debug ) { INST } #else #define DO_DEBUG(INST) #endif /* OPAL_ENABLE_DEBUG */ From c9f48e2e77dbf4928b2d6c18f2576557c112cc1c Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 14 Aug 2019 11:06:47 -0400 Subject: [PATCH 71/94] Whitespace cleanup No code or logic changes. Signed-off-by: George Bosilca Signed-off-by: Jeff Squyres --- ompi/mca/common/monitoring/common_monitoring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ompi/mca/common/monitoring/common_monitoring.c b/ompi/mca/common/monitoring/common_monitoring.c index e521ca56417..ff252bf944f 100644 --- a/ompi/mca/common/monitoring/common_monitoring.c +++ b/ompi/mca/common/monitoring/common_monitoring.c @@ -268,7 +268,7 @@ void mca_common_monitoring_register(void*pml_monitoring_component) &mca_common_monitoring_enabled); mca_common_monitoring_current_state = mca_common_monitoring_enabled; - + (void)mca_base_var_register("ompi", "pml", "monitoring", "enable_output", "Enable the PML monitoring textual output at MPI_Finalize " "(it will be automatically turned off when MPIT is used to " @@ -278,7 +278,7 @@ void mca_common_monitoring_register(void*pml_monitoring_component) MCA_BASE_VAR_FLAG_DWG, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_common_monitoring_output_enabled); - + (void)mca_base_var_register("ompi", "pml", "monitoring", "filename", /*&mca_common_monitoring_component.pmlm_version, "filename",*/ "The name of the file where the monitoring information " @@ -292,7 +292,7 @@ void mca_common_monitoring_register(void*pml_monitoring_component) /* Now that the MCA variables are automatically unregistered when * their component close, we need to keep a safe copy of the - * filename. + * filename. * Keep the copy completely separated in order to let the initial * filename to be handled by the framework. It's easier to deal * with the string lifetime. From f49c22af6d9c4215f8d97f388ca47961c46015b2 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Mon, 12 Aug 2019 16:15:42 -0700 Subject: [PATCH 72/94] Run-as-root env vars in orterun.c I found that I needed to apply the same change as #5597 to orterun.c for the environment variables to work correctly. Signed-off-by: Simon Byrne (cherry picked from commit 9c8671c48b946f4387cddb6a66aaab572fa983dd) --- orte/tools/orterun/orterun.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 7f80b147aed..2c436c69db3 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -142,6 +142,14 @@ int orterun(int argc, char *argv[]) * exit with a giant warning flag */ if (0 == geteuid() && !orte_cmd_options.run_as_root) { + char *r1, *r2; + if (NULL != (r1 = getenv("OMPI_ALLOW_RUN_AS_ROOT")) && + NULL != (r2 = getenv("OMPI_ALLOW_RUN_AS_ROOT_CONFIRM"))) { + if (0 == strcmp(r1, "1") && 0 == strcmp(r2, "1")) { + goto moveon; + } + } + fprintf(stderr, "--------------------------------------------------------------------------\n"); if (NULL != orte_cmd_options.help) { fprintf(stderr, "%s cannot provide the help message when run as root.\n", orte_basename); @@ -159,6 +167,7 @@ int orterun(int argc, char *argv[]) exit(1); } + moveon: /* setup to listen for commands sent specifically to me, even though I would probably * be the one sending them! Unfortunately, since I am a participating daemon, * there are times I need to send a command to "all daemons", and that means *I* have From 549abeaa8743db7a8cac77c6291549d92696f8aa Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Mon, 19 Aug 2019 15:36:59 -0400 Subject: [PATCH 73/94] orterun: remove duplicate code https://github.com/open-mpi/ompi/pull/6895 fixed the code in orterun.c to allow running as root if both OMPI_ALLOW_RUN_AS_ROOT and OMPI_ALLOW_RUN_AS_ROOT_CONFIRM env vars are set. However, this env-var-checking code already exists in orte_submit.c:orte_submit_init() -- it looks like the geteuid()/getenv()-checking code here in orterun is now duplicate code. So let's just get rid of the duplicate code. Signed-off-by: Jeff Squyres (cherry picked from commit 197beb30d555922b084ac3b89bb97321bf157e88) --- orte/tools/orterun/orterun.c | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 2c436c69db3..3a7abc6a361 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -137,37 +137,6 @@ int orterun(int argc, char *argv[]) exit(1); } - /* check if we are running as root - if we are, then only allow - * us to proceed if the allow-run-as-root flag was given. Otherwise, - * exit with a giant warning flag - */ - if (0 == geteuid() && !orte_cmd_options.run_as_root) { - char *r1, *r2; - if (NULL != (r1 = getenv("OMPI_ALLOW_RUN_AS_ROOT")) && - NULL != (r2 = getenv("OMPI_ALLOW_RUN_AS_ROOT_CONFIRM"))) { - if (0 == strcmp(r1, "1") && 0 == strcmp(r2, "1")) { - goto moveon; - } - } - - fprintf(stderr, "--------------------------------------------------------------------------\n"); - if (NULL != orte_cmd_options.help) { - fprintf(stderr, "%s cannot provide the help message when run as root.\n", orte_basename); - } else { - /* show_help is not yet available, so print an error manually */ - fprintf(stderr, "%s has detected an attempt to run as root.\n", orte_basename); - } - fprintf(stderr, "Running at root is *strongly* discouraged as any mistake (e.g., in\n"); - fprintf(stderr, "defining TMPDIR) or bug can result in catastrophic damage to the OS\n"); - fprintf(stderr, "file system, leaving your system in an unusable state.\n\n"); - fprintf(stderr, "You can override this protection by adding the --allow-run-as-root\n"); - fprintf(stderr, "option to your cmd line. However, we reiterate our strong advice\n"); - fprintf(stderr, "against doing so - please do so at your own risk.\n"); - fprintf(stderr, "--------------------------------------------------------------------------\n"); - exit(1); - } - - moveon: /* setup to listen for commands sent specifically to me, even though I would probably * be the one sending them! Unfortunately, since I am a participating daemon, * there are times I need to send a command to "all daemons", and that means *I* have From 66e18563bf4867763b63e520fd355b9ed6a6d0b6 Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Wed, 21 Aug 2019 12:01:55 +0300 Subject: [PATCH 74/94] SPML/UCX: fixed hang in SHMEM_FINALIZE - used MPI _Barrier to synchronize processes Signed-off-by: Sergey Oblomov (cherry picked from commit 182023febb6f8f31ce34dc54c8aa409ad7e44fa2) --- opal/mca/common/ucx/common_ucx.c | 7 +++++-- oshmem/mca/spml/ucx/spml_ucx.c | 12 ++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/opal/mca/common/ucx/common_ucx.c b/opal/mca/common/ucx/common_ucx.c index a3a12a8fa88..bf5d6c04943 100644 --- a/opal/mca/common/ucx/common_ucx.c +++ b/opal/mca/common/ucx/common_ucx.c @@ -186,8 +186,11 @@ static void opal_common_ucx_wait_all_requests(void **reqs, int count, ucp_worker } } -OPAL_DECLSPEC int opal_common_ucx_del_procs_nofence(opal_common_ucx_del_proc_t *procs, size_t count, - size_t my_rank, size_t max_disconnect, ucp_worker_h worker) { +OPAL_DECLSPEC int opal_common_ucx_del_procs_nofence(opal_common_ucx_del_proc_t *procs, + size_t count, size_t my_rank, + size_t max_disconnect, + ucp_worker_h worker) +{ size_t num_reqs; size_t max_reqs; void *dreq, **dreqs; diff --git a/oshmem/mca/spml/ucx/spml_ucx.c b/oshmem/mca/spml/ucx/spml_ucx.c index 4a0dd121d89..44ad1b4f095 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.c +++ b/oshmem/mca/spml/ucx/spml_ucx.c @@ -127,18 +127,16 @@ int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs) mca_spml_ucx_ctx_default.ucp_peers[i].ucp_conn = NULL; } - ret = opal_common_ucx_del_procs(del_procs, nprocs, oshmem_my_proc_id(), - mca_spml_ucx.num_disconnect, - mca_spml_ucx_ctx_default.ucp_worker); - + ret = opal_common_ucx_del_procs_nofence(del_procs, nprocs, oshmem_my_proc_id(), + mca_spml_ucx.num_disconnect, + mca_spml_ucx_ctx_default.ucp_worker); + /* Do not barrier here - barrier is called in _shmem_finalize */ free(del_procs); free(mca_spml_ucx.remote_addrs_tbl); free(mca_spml_ucx_ctx_default.ucp_peers); mca_spml_ucx_ctx_default.ucp_peers = NULL; - opal_common_ucx_mca_proc_added(); - return ret; } @@ -326,6 +324,8 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) free(wk_roffs); SPML_UCX_VERBOSE(50, "*** ADDED PROCS ***"); + + opal_common_ucx_mca_proc_added(); return OSHMEM_SUCCESS; error2: From 1f9fce8955c3ba904daace618661e936c34f010b Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Wed, 21 Aug 2019 12:08:09 +0300 Subject: [PATCH 75/94] SPML/UCX: fixed comment Signed-off-by: Sergey Oblomov (cherry picked from commit 01dacaa6a42b35c1b7538d8ff0036bded913c847) --- oshmem/mca/spml/ucx/spml_ucx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oshmem/mca/spml/ucx/spml_ucx.c b/oshmem/mca/spml/ucx/spml_ucx.c index 44ad1b4f095..36d3467bf5c 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.c +++ b/oshmem/mca/spml/ucx/spml_ucx.c @@ -130,7 +130,7 @@ int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs) ret = opal_common_ucx_del_procs_nofence(del_procs, nprocs, oshmem_my_proc_id(), mca_spml_ucx.num_disconnect, mca_spml_ucx_ctx_default.ucp_worker); - /* Do not barrier here - barrier is called in _shmem_finalize */ + /* No need to barrier here - barrier is called in _shmem_finalize */ free(del_procs); free(mca_spml_ucx.remote_addrs_tbl); free(mca_spml_ucx_ctx_default.ucp_peers); From b07d58a0fec15b5c86bf20a056dc237691ae448f Mon Sep 17 00:00:00 2001 From: Geoffrey Paulsen Date: Fri, 23 Aug 2019 16:36:10 -0400 Subject: [PATCH 76/94] Updating VERSION for v4.0.2rc1 Signed-off-by: Geoffrey Paulsen --- VERSION | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/VERSION b/VERSION index ea6b0bc3c1c..1b15d265b58 100644 --- a/VERSION +++ b/VERSION @@ -5,7 +5,9 @@ # All rights reserved. # Copyright (c) 2017 Los Alamos National Security, LLC. All rights # reserved. -# Copyright (c) 2018 IBM Corporation. All rights reserved. +# Copyright (c) 2019 Triad National Security, LLC. All rights +# reserved. +# Copyright (c) 2018-2019 IBM Corporation. All rights reserved. # Copyright (c) 2018 Research Organization for Information Science # and Technology (RIST). All rights reserved. @@ -28,7 +30,7 @@ release=2 # requirement is that it must be entirely printable ASCII characters # and have no white space. -greek=a1 +greek=rc1 # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" @@ -86,16 +88,16 @@ date="Unreleased developer copy" # Version numbers are described in the Libtool current:revision:age # format. -libmpi_so_version=60:1:20 -libmpi_cxx_so_version=60:0:20 -libmpi_mpifh_so_version=60:1:20 +libmpi_so_version=60:2:20 +libmpi_cxx_so_version=60:1:20 +libmpi_mpifh_so_version=60:2:20 libmpi_usempi_tkr_so_version=60:0:20 libmpi_usempi_ignore_tkr_so_version=60:0:20 -libmpi_usempif08_so_version=60:0:20 -libopen_rte_so_version=60:1:20 -libopen_pal_so_version=60:1:20 +libmpi_usempif08_so_version=60:1:20 +libopen_rte_so_version=60:2:20 +libopen_pal_so_version=60:2:20 libmpi_java_so_version=60:0:20 -liboshmem_so_version=61:0:21 +liboshmem_so_version=62:0:22 libompitrace_so_version=60:0:20 # "Common" components install standalone libraries that are run-time @@ -104,7 +106,7 @@ libompitrace_so_version=60:0:20 # components-don't-affect-the-build-system abstraction. # OMPI layer -libmca_ompi_common_ompio_so_version=60:1:19 +libmca_ompi_common_ompio_so_version=60:2:19 libmca_ompi_common_monitoring_so_version=60:0:10 # ORTE layer @@ -113,6 +115,6 @@ libmca_orte_common_alps_so_version=60:0:20 # OPAL layer libmca_opal_common_cuda_so_version=60:0:20 libmca_opal_common_sm_so_version=60:0:20 -libmca_opal_common_ucx_so_version=60:0:20 +libmca_opal_common_ucx_so_version=60:1:20 libmca_opal_common_ugni_so_version=60:0:20 libmca_opal_common_verbs_so_version=60:0:20 From 1b0cfdf264c793bb5f0fe2714cbbdfc6f54b46eb Mon Sep 17 00:00:00 2001 From: Scott Miller Date: Tue, 20 Aug 2019 14:46:16 -0400 Subject: [PATCH 77/94] v4.0.x: regx/naive: add regx/naive component Signed-off-by: Scott Miller --- orte/mca/regx/fwd/regx_fwd.c | 4 + orte/mca/regx/naive/Makefile.am | 36 ++++ orte/mca/regx/naive/owner.txt | 7 + orte/mca/regx/naive/regx_naive.c | 181 +++++++++++++++++++++ orte/mca/regx/naive/regx_naive.h | 34 ++++ orte/mca/regx/naive/regx_naive_component.c | 62 +++++++ orte/mca/regx/reverse/regx_reverse.c | 4 + 7 files changed, 328 insertions(+) create mode 100644 orte/mca/regx/naive/Makefile.am create mode 100644 orte/mca/regx/naive/owner.txt create mode 100644 orte/mca/regx/naive/regx_naive.c create mode 100644 orte/mca/regx/naive/regx_naive.h create mode 100644 orte/mca/regx/naive/regx_naive_component.c diff --git a/orte/mca/regx/fwd/regx_fwd.c b/orte/mca/regx/fwd/regx_fwd.c index c5c4d5d9030..bdb7e145bf2 100644 --- a/orte/mca/regx/fwd/regx_fwd.c +++ b/orte/mca/regx/fwd/regx_fwd.c @@ -296,5 +296,9 @@ static int nidmap_create(opal_pointer_array_t *pool, char **regex) free(nodenames); free(tmp); *regex = tmp2; + opal_output_verbose(5, orte_regx_base_framework.framework_output, + "%s Final regex: <%s>", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + *regex); return ORTE_SUCCESS; } diff --git a/orte/mca/regx/naive/Makefile.am b/orte/mca/regx/naive/Makefile.am new file mode 100644 index 00000000000..0cb7fea20d6 --- /dev/null +++ b/orte/mca/regx/naive/Makefile.am @@ -0,0 +1,36 @@ +# +# Copyright (c) 2016-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2019 IBM Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + regx_naive_component.c \ + regx_naive.h \ + regx_naive.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_regx_naive_DSO +component_noinst = +component_install = mca_regx_naive.la +else +component_noinst = libmca_regx_naive.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_regx_naive_la_SOURCES = $(sources) +mca_regx_naive_la_LDFLAGS = -module -avoid-version +mca_regx_naive_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_regx_naive_la_SOURCES = $(sources) +libmca_regx_naive_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/regx/naive/owner.txt b/orte/mca/regx/naive/owner.txt new file mode 100644 index 00000000000..2fd247dddb1 --- /dev/null +++ b/orte/mca/regx/naive/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: IBM +status: active diff --git a/orte/mca/regx/naive/regx_naive.c b/orte/mca/regx/naive/regx_naive.c new file mode 100644 index 00000000000..710b70fb08d --- /dev/null +++ b/orte/mca/regx/naive/regx_naive.c @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#include "opal/util/argv.h" +#include "opal/util/basename.h" +#include "opal/util/opal_environ.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" +#include "orte/util/show_help.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmaps/base/base.h" +#include "orte/mca/routed/routed.h" +#include "orte/mca/regx/base/base.h" + +#include "regx_naive.h" + +static int nidmap_create(opal_pointer_array_t *pool, char **regex); + +orte_regx_base_module_t orte_regx_naive_module = { + .nidmap_create = nidmap_create, + .nidmap_parse = orte_regx_base_nidmap_parse, + .extract_node_names = orte_regx_base_extract_node_names, + .encode_nodemap = orte_regx_base_encode_nodemap, + .decode_daemon_nodemap = orte_regx_base_decode_daemon_nodemap, + .generate_ppn = orte_regx_base_generate_ppn, + .parse_ppn = orte_regx_base_parse_ppn +}; + +static int nidmap_create(opal_pointer_array_t *pool, char **regex) +{ + char *node; + int n; + char *nodenames; + orte_regex_range_t *rng; + opal_list_t dvpids; + opal_list_item_t *item; + char **regexargs = NULL, **vpidargs = NULL, *tmp, *tmp2; + orte_node_t *nptr; + orte_vpid_t vpid; + + if (mca_regx_naive_component.compress_vpids) { + OBJ_CONSTRUCT(&dvpids, opal_list_t); + } + + rng = NULL; + for (n=0; n < pool->size; n++) { + if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(pool, n))) { + continue; + } + /* if no daemon has been assigned, then this node is not being used */ + if (NULL == nptr->daemon) { + vpid = -1; // indicates no daemon assigned + } else { + vpid = nptr->daemon->name.vpid; + } + + if (mca_regx_naive_component.compress_vpids) { + /* deal with the daemon vpid - see if it is next in the + * current range */ + if (NULL == rng) { + /* just starting */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } else if (UINT32_MAX == vpid) { + if (-1 == rng->vpid) { + rng->cnt++; + } else { + /* need to start another range */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } + } else if (-1 == rng->vpid) { + /* need to start another range */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } else { + /* is this the next in line */ + if (vpid == (orte_vpid_t)(rng->vpid + rng->cnt)) { + rng->cnt++; + } else { + /* need to start another range */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } + } + } + else { + asprintf(&tmp, "%u", vpid); + opal_argv_append_nosize(&vpidargs, tmp); + free(tmp); + } + + node = nptr->name; + opal_output_verbose(5, orte_regx_base_framework.framework_output, + "%s PROCESS NODE <%s>", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + node); + + /* Don't compress the name - just add it to the list */ + if (NULL != node) { + /* solitary node */ + opal_argv_append_nosize(®exargs, node); + } + } + + /* assemble final result */ + nodenames = opal_argv_join(regexargs, ','); + /* cleanup */ + opal_argv_free(regexargs); + + if (mca_regx_naive_component.compress_vpids) { + /* do the same for the vpids */ + tmp = NULL; + while (NULL != (item = opal_list_remove_first(&dvpids))) { + rng = (orte_regex_range_t*)item; + if (1 < rng->cnt) { + if (NULL == tmp) { + asprintf(&tmp, "%u(%u)", rng->vpid, rng->cnt); + } else { + asprintf(&tmp2, "%s,%u(%u)", tmp, rng->vpid, rng->cnt); + free(tmp); + tmp = tmp2; + } + } else { + if (NULL == tmp) { + asprintf(&tmp, "%u", rng->vpid); + } else { + asprintf(&tmp2, "%s,%u", tmp, rng->vpid); + free(tmp); + tmp = tmp2; + } + } + OBJ_RELEASE(rng); + } + OPAL_LIST_DESTRUCT(&dvpids); + } + else { + tmp = opal_argv_join(vpidargs, ','); + /* cleanup */ + opal_argv_free(vpidargs); + } + + /* now concatenate the results into one string */ + asprintf(&tmp2, "%s@%s", nodenames, tmp); + free(nodenames); + free(tmp); + *regex = tmp2; + opal_output_verbose(5, orte_regx_base_framework.framework_output, + "%s Final regex: <%s>", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + *regex); + return ORTE_SUCCESS; +} diff --git a/orte/mca/regx/naive/regx_naive.h b/orte/mca/regx/naive/regx_naive.h new file mode 100644 index 00000000000..012dfa25680 --- /dev/null +++ b/orte/mca/regx/naive/regx_naive.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef _MCA_REGX_NONE_H_ +#define _MCA_REGX_NONE_H_ + +#include "orte_config.h" + +#include "orte/types.h" + +#include "opal/mca/base/base.h" +#include "orte/mca/regx/regx.h" + + +BEGIN_C_DECLS + +struct orte_regx_naive_component_t { + orte_regx_base_component_t super; + bool compress_vpids; +}; +typedef struct orte_regx_naive_component_t orte_regx_naive_component_t; + +ORTE_MODULE_DECLSPEC extern orte_regx_naive_component_t mca_regx_naive_component; +extern orte_regx_base_module_t orte_regx_naive_module; + +END_C_DECLS + +#endif /* MCA_REGX_ORTE_H_ */ diff --git a/orte/mca/regx/naive/regx_naive_component.c b/orte/mca/regx/naive/regx_naive_component.c new file mode 100644 index 00000000000..b29641f194f --- /dev/null +++ b/orte/mca/regx/naive/regx_naive_component.c @@ -0,0 +1,62 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#include "opal/util/show_help.h" + +#include "orte/mca/regx/regx.h" +#include "regx_naive.h" + +static int component_query(mca_base_module_t **module, int *priority); +static int component_register(void); + +/* + * Struct of function pointers and all that to let us be initialized + */ +orte_regx_naive_component_t mca_regx_naive_component = { + { + .base_version = { + MCA_REGX_BASE_VERSION_1_0_0, + .mca_component_name = "naive", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + .mca_query_component = component_query, + .mca_register_component_params = component_register, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + } +}; + +static int component_query(mca_base_module_t **module, int *priority) +{ + *module = (mca_base_module_t*)&orte_regx_naive_module; + *priority = 1; + return ORTE_SUCCESS; +} + +static int component_register(void) +{ + mca_base_component_t *c = &mca_regx_naive_component.super.base_version; + + mca_regx_naive_component.compress_vpids = false; + (void) mca_base_component_var_register (c, "compress_vpids", "Enable compression of vpids (default: false)", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_regx_naive_component.compress_vpids); + + return ORTE_SUCCESS; +} diff --git a/orte/mca/regx/reverse/regx_reverse.c b/orte/mca/regx/reverse/regx_reverse.c index b6c3ba187a0..1437f48ad75 100644 --- a/orte/mca/regx/reverse/regx_reverse.c +++ b/orte/mca/regx/reverse/regx_reverse.c @@ -315,5 +315,9 @@ static int nidmap_create(opal_pointer_array_t *pool, char **regex) free(nodenames); free(tmp); *regex = tmp2; + opal_output_verbose(5, orte_regx_base_framework.framework_output, + "%s Final regex: <%s>", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + *regex); return ORTE_SUCCESS; } From 197607c896dfe5260c8015a97fb50484365638f9 Mon Sep 17 00:00:00 2001 From: Geoffrey Paulsen Date: Sat, 24 Aug 2019 16:42:36 -0500 Subject: [PATCH 78/94] Updating NEWS for v4.0.2 Signed-off-by: Geoffrey Paulsen --- NEWS | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index c7fcd83b079..a4374b10d4e 100644 --- a/NEWS +++ b/NEWS @@ -14,7 +14,7 @@ Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. Use is subject to license terms. Copyright (c) 2006-2018 Los Alamos National Security, LLC. All rights reserved. -Copyright (c) 2010-2018 IBM Corporation. All rights reserved. +Copyright (c) 2010-2019 IBM Corporation. All rights reserved. Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. Copyright (c) 2012 Sandia National Laboratories. All rights reserved. Copyright (c) 2012 University of Houston. All rights reserved. @@ -57,6 +57,51 @@ included in the vX.Y.Z section and be denoted as: (** also appeared: A.B.C) -- indicating that this item was previously included in release version vA.B.C. +4.0.2 -- September, 2019 +------------------------ +- Update embedded PMIx to 3.1.4 +- Fix an issue with env processing when running as root. + Thanks to Simon Byrne for reporting and providing a fix. +- Fix Fortran MPI_FILE_GET_POSITION return code bug. + Thanks to Wei-Keng Liao for reporting. +- Fix user defined datatypes/ops leak in nonblocking base collective + component. Thanks to Andrey Maslennikov for verifying fix. +- Fixed shared memory not working with spawned processes. + Thanks to @rodarima for reporting. +- Fix data corruption of overlapping datatypes on sends. + Thanks to DKRZ for reporting. +- Fix segfault in oob_tcp component on close with active listeners. + Thanks to Orivej Desh for reporting and providing a fix. +- Fix divide by zero segfault in ompio. + Thanks to @haraldkl for reporting and providing a fix. +- Fix finalize of flux compnents. + Thanks to Stephen Herbein and Jim Garlick for providing a fix. +- Fix osc_rdma_acc_single_intrinsic regression. + Thanks to Joseph Schuchart for reporting and providing a fix. +- Fix hostnames with large integers. + Thanks to @perrynzhou for reporting and providing a fix. +- Fix Deadlock in MPI_Fetch_and_op when using UCX + Thanks to Joseph Schuchart for reporting. +- Fix the SLURM plm for mpirun-based launching. + Thanks to Jordon Hayes for reporting and providing a fix. +- Prevent grep failure in rpmbuild from aborting. + Thanks to Daniel Letai for reporting. +- Fix btl/vader finalize sequence. + Thanks to Daniel Vollmer for reporting. +- Fix pml/ob1 local handle sent during PUT control message. + Thanks to @EmmanuelBRELLE for reporting and providing a fix. +- Fix Memory leak with persistent MPI sends and the ob1 "get" protocol. + Thanks to @s-kuberski for reporting. +- v4.0.x: mpi: mark MPI_COMBINER_{HVECTOR,HINDEXED,STRUCT}_INTEGER + removed unless configured with --enable-mpi1-compatibility +- Fix make-authors.pl when run in a git submodule. + Thanks to Michael Heinz for reporting and providing a fix. +- Fix deadlock with mpi_assert_allow_overtaking in MPI_Issend. + Thanks to Joseph Schuchart and George Bosilca for reporting. +- Add compilation flag to allow unwinding through files that are + present in the stack when attaching with MPIR. + Thanks to James A Clark for reporting and providing a fix. + 4.0.1 -- March, 2019 -------------------- From 8efc6e1dc15eb19afea48a53f09830ce1328ad10 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 26 Aug 2019 23:48:34 -0700 Subject: [PATCH 79/94] Remove unnecessary error log Refs https://github.com/pmix/pmix/pull/1413 Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c | 1 - 1 file changed, 1 deletion(-) diff --git a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c index 48cee715afe..b620c05cf29 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix3x/pmix/src/client/pmix_client_get.c @@ -331,7 +331,6 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, } if (PMIX_SUCCESS != ret) { - PMIX_ERROR_LOG(ret); goto done; } if (PMIX_RANK_UNDEF == proc.rank) { From 83a251899482398a33e0d79240a48b541e937ec2 Mon Sep 17 00:00:00 2001 From: Valentin Petrov Date: Tue, 27 Aug 2019 15:44:26 +0300 Subject: [PATCH 80/94] Coll/hcoll: fixes hcoll non-blocking colls support open-mpi/ompi@0fe756d4166eecf2f0ee2598da690c69a7c824c4 Introduced a bug in coll/hcoll component. The ompi_requests allocated by libhcoll would be treated as coll_base_nbc_request during ompi_coll_base_retain_<> call. Afterwards this would lead to a segv in the request cleanup. Fix: since libhcoll interface does not distinguish between the blocling/non-blocking requests use coll_base_nbc_request all the time and initialize it properly in coll/hcoll/get_coll_handle(). It is still within 2 cache lines. Signed-off-by: Valentin Petrov --- ompi/mca/coll/hcoll/coll_hcoll_rte.c | 47 +++++++++++----------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/ompi/mca/coll/hcoll/coll_hcoll_rte.c b/ompi/mca/coll/hcoll/coll_hcoll_rte.c index 6df2dde7e90..b7b87d0f41a 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_rte.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_rte.c @@ -39,7 +39,7 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/datatype/ompi_datatype_internal.h" #include "ompi/mca/pml/pml.h" - +#include "ompi/mca/coll/base/coll_base_util.h" #include "hcoll/api/hcoll_dte.h" #include "hcoll/api/hcoll_api.h" @@ -151,25 +151,13 @@ void hcoll_rte_fns_setup(void) { init_module_fns(); OBJ_CONSTRUCT(&mca_coll_hcoll_component.requests, opal_free_list_t); - opal_free_list_init( - &(mca_coll_hcoll_component.requests), - sizeof(ompi_request_t), - /* no special alignment needed */ - 8, - OBJ_CLASS(ompi_request_t), - /* no payload data */ - 0, 0, - /* NOTE: hack - need to parametrize this */ - 10, - -1, - 10, - /* No Mpool or init function */ - NULL, - 0, - NULL, - NULL, - NULL - ); + opal_free_list_init(&(mca_coll_hcoll_component.requests), + sizeof(ompi_coll_base_nbc_request_t), + opal_cache_line_size, OBJ_CLASS(ompi_coll_base_nbc_request_t), + /* no payload data */ + 0, 0, 10, -1, 10, + /* No Mpool or init function */ + NULL, 0, NULL, NULL, NULL); } static int recv_nb(struct dte_data_representation_t data, @@ -349,20 +337,23 @@ request_free(struct ompi_request_t **ompi_req) static void* get_coll_handle(void) { - ompi_request_t *ompi_req; + ompi_coll_base_nbc_request_t *ompi_req; opal_free_list_item_t *item; item = opal_free_list_wait (&(mca_coll_hcoll_component.requests)); if (OPAL_UNLIKELY(NULL == item)) { HCOL_ERROR("Wait for free list failed.\n"); return NULL; } - ompi_req = (ompi_request_t *)item; - OMPI_REQUEST_INIT(ompi_req,false); - ompi_req->req_complete_cb = NULL; - ompi_req->req_status.MPI_ERROR = MPI_SUCCESS; - ompi_req->req_state = OMPI_REQUEST_ACTIVE; - ompi_req->req_free = request_free; - ompi_req->req_type = OMPI_REQUEST_COLL; + ompi_req = (ompi_coll_base_nbc_request_t *)item; + OMPI_REQUEST_INIT(&ompi_req->super,false); + ompi_req->super.req_complete_cb = NULL; + ompi_req->super.req_complete_cb_data = NULL; + ompi_req->super.req_status.MPI_ERROR = MPI_SUCCESS; + ompi_req->super.req_state = OMPI_REQUEST_ACTIVE; + ompi_req->super.req_free = request_free; + ompi_req->super.req_type = OMPI_REQUEST_COLL; + ompi_req->data.objs.objs[0] = NULL; + ompi_req->data.objs.objs[1] = NULL; return (void *)ompi_req; } From 8b3fd5682ff6b54c71b6513a9ba74a537078faec Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Tue, 27 Aug 2019 11:55:03 -0700 Subject: [PATCH 81/94] Update OPAL DDT variable names These variables were renamed in 904276bb44caec207638247f23139bc21bc6a09e; update them to use the new names. Signed-off-by: Jeff Squyres (cherry picked from commit 2ab8109be15a7739caa72ec8f863e8e01c2c9a0f) --- test/datatype/position.c | 12 ++++++------ test/datatype/position_noncontig.c | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/datatype/position.c b/test/datatype/position.c index 07725816e43..0be4f77fd85 100644 --- a/test/datatype/position.c +++ b/test/datatype/position.c @@ -197,9 +197,9 @@ static void dump_ldi( ddt_ldi_t* buffer, int start_pos, int end_pos ) #endif #if (OPAL_ENABLE_DEBUG == 1) && (OPAL_C_HAVE_VISIBILITY == 0) -extern bool opal_unpack_debug; -extern bool opal_pack_debug; -extern bool opal_position_debug ; +extern bool opal_ddt_unpack_debug; +extern bool opal_ddt_pack_debug; +extern bool opal_ddt_position_debug ; #endif /* OPAL_ENABLE_DEBUG */ static char* bytes_dump( void* src, size_t cnt ) @@ -235,9 +235,9 @@ int main( int argc, char* argv[] ) ompi_datatype_init(); #if (OPAL_ENABLE_DEBUG == 1) && (OPAL_C_HAVE_VISIBILITY == 0) - opal_unpack_debug = false; - opal_pack_debug = false; - opal_position_debug = false; + opal_ddt_unpack_debug = false; + opal_ddt_pack_debug = false; + opal_ddt_position_debug = false; #endif /* OPAL_ENABLE_DEBUG */ create_segments( datatype, data_count, fragment_size, diff --git a/test/datatype/position_noncontig.c b/test/datatype/position_noncontig.c index 0fb94c224ab..7058c6d17ba 100644 --- a/test/datatype/position_noncontig.c +++ b/test/datatype/position_noncontig.c @@ -178,9 +178,9 @@ unpack_segments( ompi_datatype_t* datatype, int count, } #if (OPAL_ENABLE_DEBUG == 1) && (OPAL_C_HAVE_VISIBILITY == 0) -extern bool opal_unpack_debug; -extern bool opal_pack_debug; -extern bool opal_position_debug ; +extern bool opal_ddt_unpack_debug; +extern bool opal_ddt_pack_debug; +extern bool opal_ddt_position_debug ; #endif /* OPAL_ENABLE_DEBUG */ int main( int argc, char* argv[] ) @@ -206,9 +206,9 @@ int main( int argc, char* argv[] ) ompi_datatype_commit(&datatype); #if (OPAL_ENABLE_DEBUG == 1) && (OPAL_C_HAVE_VISIBILITY == 0) - opal_unpack_debug = false; - opal_pack_debug = false; - opal_position_debug = false; + opal_ddt_unpack_debug = false; + opal_ddt_pack_debug = false; + opal_ddt_position_debug = false; #endif /* OPAL_ENABLE_DEBUG */ create_segments( datatype, 1, fragment_size, From 8d130e1964e4278efaf3bc8bc56e101b29a6a790 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Tue, 27 Aug 2019 15:43:15 +0200 Subject: [PATCH 82/94] UCX osc: properly release exclusive lock to avoid lockup Signed-off-by: Joseph Schuchart (cherry picked from commit 08cb6389e034c1a70368671f745f20904c774a1e) --- ompi/mca/osc/ucx/osc_ucx_passive_target.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/ompi/mca/osc/ucx/osc_ucx_passive_target.c b/ompi/mca/osc/ucx/osc_ucx_passive_target.c index 38a7dccb273..89920a29918 100644 --- a/ompi/mca/osc/ucx/osc_ucx_passive_target.c +++ b/ompi/mca/osc/ucx/osc_ucx_passive_target.c @@ -90,21 +90,19 @@ static inline int start_exclusive(ompi_osc_ucx_module_t *module, int target) { } static inline int end_exclusive(ompi_osc_ucx_module_t *module, int target) { - uint64_t result_value = 0; ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); ucp_rkey_h rkey = (module->state_info_array)[target].rkey; uint64_t remote_addr = (module->state_info_array)[target].addr + OSC_UCX_STATE_LOCK_OFFSET; - int ret; + ucs_status_t status; - ret = opal_common_ucx_atomic_fetch(ep, UCP_ATOMIC_FETCH_OP_SWAP, TARGET_LOCK_UNLOCKED, - &result_value, sizeof(result_value), - remote_addr, rkey, mca_osc_ucx_component.ucp_worker); - if (OMPI_SUCCESS != ret) { - return ret; + status = ucp_atomic_post(ep, UCP_ATOMIC_POST_OP_ADD, + -((int64_t)TARGET_LOCK_EXCLUSIVE), sizeof(uint64_t), + remote_addr, rkey); + if (UCS_OK != status) { + OSC_UCX_VERBOSE(1, "ucp_atomic_post(OP_ADD) failed: %d", status); + return OMPI_ERROR; } - assert(result_value >= TARGET_LOCK_EXCLUSIVE); - return OMPI_SUCCESS; } From 4c1160e257c945d095c1985671b8a478fc45ca6a Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Thu, 29 Aug 2019 16:26:43 -0400 Subject: [PATCH 83/94] Fix tree spawn routed component issue * Fix #6618 - See comments on Issue #6618 for finer details. * The `plm/rsh` component uses the highest priority `routed` component to construct the launch tree. The remote orted's will activate all available `routed` components when updating routes. This allows the opportunity for the parent vpid on the remote `orted` to not match that which was expected in the tree launch. The result is that the remote orted tries to contact their parent with the wrong contact information and orted wireup will fail. * This fix forces the orteds to use the same `routed` component as the HNP used when contructing the tree, if tree launch is enabled. Signed-off-by: Joshua Hursey --- orte/mca/plm/rsh/plm_rsh_module.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/orte/mca/plm/rsh/plm_rsh_module.c b/orte/mca/plm/rsh/plm_rsh_module.c index 7e34de9ecfe..d022fe51c29 100644 --- a/orte/mca/plm/rsh/plm_rsh_module.c +++ b/orte/mca/plm/rsh/plm_rsh_module.c @@ -343,11 +343,12 @@ static int setup_launch(int *argcptr, char ***argvptr, char *orted_cmd, *orted_prefix, *final_cmd; int orted_index; int rc; - int i, j; + int i, j, cnt; bool found; char *lib_base=NULL, *bin_base=NULL; char *opal_prefix = getenv("OPAL_PREFIX"); char* full_orted_cmd = NULL; + char * rtmod; /* Figure out the basenames for the libdir and bindir. This requires some explanation: @@ -609,6 +610,18 @@ static int setup_launch(int *argcptr, char ***argvptr, (mca_plm_rsh_component.using_llspawn && mca_plm_rsh_component.daemonize_llspawn))) { } + if (!mca_plm_rsh_component.no_tree_spawn) { + // Remove problematic and/or conflicting command line arguments that + // should not be passed on to our children. + cnt = opal_argv_count(orted_cmd_line); + for (i=0; i < cnt; i+=3) { + if (0 == strcmp(orted_cmd_line[i+1], "routed")) { + opal_argv_delete(&cnt, &orted_cmd_line, i, 3); + break; + } + } + } + /* * Add the basic arguments to the orted command line, including * all debug options @@ -627,6 +640,16 @@ static int setup_launch(int *argcptr, char ***argvptr, if (!mca_plm_rsh_component.no_tree_spawn) { opal_argv_append(&argc, &argv, "--tree-spawn"); orte_oob_base_get_addr(¶m); + + // When tree-spawn'ing we need to force the remote daemons to use + // the routing component that was used to setup the launch tree. + // Otherwise the orte_parent_uri will not match the orted they + // expect to find in the routing tree. + rtmod = orte_rml.get_routed(orte_coll_conduit); + opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID); + opal_argv_append(&argc, &argv, "routed"); + opal_argv_append(&argc, &argv, rtmod); + opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID); opal_argv_append(&argc, &argv, "orte_parent_uri"); opal_argv_append(&argc, &argv, param); @@ -1187,6 +1210,10 @@ static void launch_daemons(int fd, short args, void *cbdata) OBJ_CONSTRUCT(&coll, opal_list_t); rtmod = orte_rml.get_routed(orte_coll_conduit); orte_routed.get_routing_list(rtmod, &coll); + + OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, + "%s plm:rsh:launch Tree Launch using routed/%s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rtmod)); } /* setup the launch */ From fbbacc1303f02b4c955975911b82e8755ea0721c Mon Sep 17 00:00:00 2001 From: Harumi Kuno Date: Tue, 30 Jul 2019 06:32:27 -0700 Subject: [PATCH 84/94] Fix mmap infinite recurse in memory patcher This commit fixes issue #6853 by removing MacOS/Darwin-specific logic from intercept_mmap. Signed-off-by: Harumi Kuno --- opal/mca/memory/patcher/configure.m4 | 4 ---- opal/mca/memory/patcher/memory_patcher_component.c | 5 ----- 2 files changed, 9 deletions(-) diff --git a/opal/mca/memory/patcher/configure.m4 b/opal/mca/memory/patcher/configure.m4 index 6881ec69366..0c5d8553259 100644 --- a/opal/mca/memory/patcher/configure.m4 +++ b/opal/mca/memory/patcher/configure.m4 @@ -40,10 +40,6 @@ AC_DEFUN([MCA_opal_memory_patcher_CONFIG],[ AC_CHECK_HEADERS([linux/mman.h sys/syscall.h]) - AC_CHECK_DECLS([__mmap], [], [], [#include ]) - - AC_CHECK_FUNCS([__mmap]) - AC_CHECK_DECLS([__syscall], [], [], [#include ]) AC_CHECK_FUNCS([__syscall]) diff --git a/opal/mca/memory/patcher/memory_patcher_component.c b/opal/mca/memory/patcher/memory_patcher_component.c index 5db3a6016f8..687d430fa36 100644 --- a/opal/mca/memory/patcher/memory_patcher_component.c +++ b/opal/mca/memory/patcher/memory_patcher_component.c @@ -125,12 +125,7 @@ static void *_intercept_mmap(void *start, size_t length, int prot, int flags, in } if (!original_mmap) { -#ifdef HAVE___MMAP - /* the darwin syscall returns an int not a long so call the underlying __mmap function */ - result = __mmap (start, length, prot, flags, fd, offset); -#else result = (void*)(intptr_t) memory_patcher_syscall(SYS_mmap, start, length, prot, flags, fd, offset); -#endif } else { result = original_mmap (start, length, prot, flags, fd, offset); } From 9bf1873215d318d1cb1b581def966125ff9c1b54 Mon Sep 17 00:00:00 2001 From: guserav Date: Tue, 9 Jul 2019 13:39:15 -0700 Subject: [PATCH 85/94] Fix osc sm posts when only 32 bit atomics support Signed-off-by: guserav (cherry picked from commit 3c9f4e682369e6fd5860b46ba81d79f2d1599a35) --- ompi/mca/osc/sm/osc_sm_active_target.c | 6 +++--- ompi/mca/osc/sm/osc_sm_component.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ompi/mca/osc/sm/osc_sm_active_target.c b/ompi/mca/osc/sm/osc_sm_active_target.c index ab0f73f87c6..e34389be6c7 100644 --- a/ompi/mca/osc/sm/osc_sm_active_target.c +++ b/ompi/mca/osc/sm/osc_sm_active_target.c @@ -151,7 +151,7 @@ ompi_osc_sm_start(struct ompi_group_t *group, for (int i = 0 ; i < size ; ++i) { int rank_byte = ranks[i] >> OSC_SM_POST_BITS; - osc_sm_post_type_t rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & 0x3f); + osc_sm_post_type_t rank_bit = ((osc_sm_post_type_t) 1) << (ranks[i] & OSC_SM_POST_MASK); /* wait for rank to post */ while (!(module->posts[my_rank][rank_byte] & rank_bit)) { @@ -221,8 +221,8 @@ ompi_osc_sm_post(struct ompi_group_t *group, ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; int my_rank = ompi_comm_rank (module->comm); - int my_byte = my_rank >> 6; - uint64_t my_bit = ((uint64_t) 1) << (my_rank & 0x3f); + int my_byte = my_rank >> OSC_SM_POST_BITS; + osc_sm_post_type_t my_bit = ((osc_sm_post_type_t) 1) << (my_rank & OSC_SM_POST_MASK); int gsize; OPAL_THREAD_LOCK(&module->lock); diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index de891e71a11..0475e65266b 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -242,7 +242,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit int i, flag; size_t pagesize; size_t state_size; - size_t posts_size, post_size = (comm_size + 63) / 64; + size_t posts_size, post_size = (comm_size + OSC_SM_POST_MASK) / (OSC_SM_POST_MASK + 1); OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output, "allocating shared memory region of size %ld\n", (long) size)); From e2b154327e09e3aa0029463b6d20a90632ab680a Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 29 Aug 2019 18:04:46 -0400 Subject: [PATCH 86/94] Small optimization on the datatype commit. This patch fixes the merge of contiguous elements into larger but more compact datatypes, and allows for contiguous elements to have thir blocklen increasing instead of the count. The idea is to always maximize the blocklen, aka. the contiguous part of the datatype. Signed-off-by: George Bosilca (cherry picked from commit 41e6f55807b01ad5c04e8387a3699cf743931f6a) --- opal/datatype/opal_datatype_optimize.c | 26 ++++++++++++++++---------- opal/datatype/opal_datatype_pack.c | 1 + 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/opal/datatype/opal_datatype_optimize.c b/opal/datatype/opal_datatype_optimize.c index 336e11f0560..2e661b95daa 100644 --- a/opal/datatype/opal_datatype_optimize.c +++ b/opal/datatype/opal_datatype_optimize.c @@ -163,6 +163,12 @@ opal_datatype_optimize_short( opal_datatype_t* pData, if( 0 == last.count ) { /* first data of the datatype */ last = *current; continue; /* next data */ + } else { /* can we merge it in order to decrease count */ + if( (ptrdiff_t)last.blocklen * (ptrdiff_t)opal_datatype_basicDatatypes[last.common.type]->size == last.extent ) { + last.extent *= last.count; + last.blocklen *= last.count; + last.count = 1; + } } /* are the two elements compatible: aka they have very similar values and they @@ -176,27 +182,27 @@ opal_datatype_optimize_short( opal_datatype_t* pData, last.common.type = OPAL_DATATYPE_UINT1; } - if( 1 == last.count ) { - /* we can ignore the extent of the element with count == 1 and merge them together if their displacements match */ + if( (last.extent * (ptrdiff_t)last.count + last.disp) == current->disp ) { if( 1 == current->count ) { - last.extent = current->disp - last.disp; last.count++; continue; } - /* can we compute a matching displacement ? */ - if( (last.disp + current->extent) == current->disp ) { - last.extent = current->extent; - last.count = current->count + 1; + if( last.extent == current->extent ) { + last.count += current->count; continue; } } - if( (last.extent * (ptrdiff_t)last.count + last.disp) == current->disp ) { + if( 1 == last.count ) { + /* we can ignore the extent of the element with count == 1 and merge them together if their displacements match */ if( 1 == current->count ) { + last.extent = current->disp - last.disp; last.count++; continue; } - if( last.extent == current->extent ) { - last.count += current->count; + /* can we compute a matching displacement ? */ + if( (last.disp + current->extent) == current->disp ) { + last.extent = current->extent; + last.count = current->count + last.count; continue; } } diff --git a/opal/datatype/opal_datatype_pack.c b/opal/datatype/opal_datatype_pack.c index f21adcccb34..6dc0b81a253 100644 --- a/opal/datatype/opal_datatype_pack.c +++ b/opal/datatype/opal_datatype_pack.c @@ -121,6 +121,7 @@ opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv, * how much we should jump between elements. */ assert( (pData->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && ((ptrdiff_t)pData->size != extent) ); + assert( pData->opt_desc.used <= 1 ); DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n", (void*)pConv->pBaseBuf, *out_size ); ); if( stack[1].type != opal_datatype_uint1.id ) { From 8f16780ee023d25055d04300d3b16f3f1c9d808a Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Fri, 30 Aug 2019 19:55:41 -0400 Subject: [PATCH 87/94] Add a test for datatypes composed by multiple predefined elements that can be merged into a larger UINT1 type. Signed-off-by: George Bosilca (cherry picked from commit 82d632278ae5ab4337984d5ef4793f818c4dd437) --- test/datatype/to_self.c | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/test/datatype/to_self.c b/test/datatype/to_self.c index 073fe4f0b57..2ba3eb92163 100644 --- a/test/datatype/to_self.c +++ b/test/datatype/to_self.c @@ -24,6 +24,26 @@ extern void ompi_datatype_dump( MPI_Datatype ddt ); #define MPI_DDT_DUMP(ddt) #endif /* OPEN_MPI */ +static MPI_Datatype +create_merged_contig_with_gaps(int count) /* count of the basic datatype */ +{ + int array_of_blocklengths[] = {1, 1, 1}; + MPI_Aint array_of_displacements[] = {0, 8, 16}; + MPI_Datatype array_of_types[] = {MPI_DOUBLE, MPI_LONG, MPI_CHAR}; + MPI_Datatype type; + + MPI_Type_create_struct(3, array_of_blocklengths, + array_of_displacements, array_of_types, + &type); + if( 1 < count ) { + MPI_Datatype temp = type; + MPI_Type_contiguous(count, temp, &type); + } + MPI_Type_commit(&type); + MPI_DDT_DUMP( type ); + return type; +} + /* Create a non-contiguous resized datatype */ struct structure { double not_transfered; @@ -183,11 +203,12 @@ create_indexed_gap_optimized_ddt( void ) /******************************************************************** *******************************************************************/ -#define DO_CONTIG 0x00000001 -#define DO_CONSTANT_GAP 0x00000002 -#define DO_INDEXED_GAP 0x00000004 -#define DO_OPTIMIZED_INDEXED_GAP 0x00000008 -#define DO_STRUCT_CONSTANT_GAP_RESIZED 0x00000010 +#define DO_CONTIG 0x00000001 +#define DO_CONSTANT_GAP 0x00000002 +#define DO_INDEXED_GAP 0x00000004 +#define DO_OPTIMIZED_INDEXED_GAP 0x00000008 +#define DO_STRUCT_CONSTANT_GAP_RESIZED 0x00000010 +#define DO_STRUCT_MERGED_WITH_GAP_RESIZED 0x00000020 #define DO_PACK 0x01000000 #define DO_UNPACK 0x02000000 @@ -483,7 +504,7 @@ static int do_test_for_ddt( int doop, MPI_Datatype sddt, MPI_Datatype rddt, int int main( int argc, char* argv[] ) { - int run_tests = 0xffff; /* do all datatype tests by default */ + int run_tests = DO_STRUCT_MERGED_WITH_GAP_RESIZED; /* do all datatype tests by default */ int rank, size; MPI_Datatype ddt; @@ -544,6 +565,14 @@ int main( int argc, char* argv[] ) MPI_Type_free( &ddt ); } + if( run_tests & DO_STRUCT_MERGED_WITH_GAP_RESIZED ) { + printf( "\nstruct constant gap resized\n\n" ); + ddt = create_merged_contig_with_gaps( 1 ); + MPI_DDT_DUMP( ddt ); + do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH ); + MPI_Type_free( &ddt ); + } + MPI_Finalize (); exit(0); } From 95cc53e331b13477749802a853b44794b1050d03 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 6 Sep 2019 08:27:05 -0700 Subject: [PATCH 88/94] Be a little less restrictive on interface requirements If both types of interfaces are enabled, don't error out if one of them isn't able to open listener sockets. Only one interface family may be available on some machines, but someone might want to build the code to run more generally. Refs https://github.com/pmix/prrte/pull/249 Signed-off-by: Ralph Castain (cherry picked from commit 06d188ebf3646760f50d4513361b50642af9cec4) --- orte/mca/oob/tcp/help-oob-tcp.txt | 6 +++++- orte/mca/oob/tcp/oob_tcp_listener.c | 20 ++++++++++---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/orte/mca/oob/tcp/help-oob-tcp.txt b/orte/mca/oob/tcp/help-oob-tcp.txt index e5562ac4708..8af0589032f 100644 --- a/orte/mca/oob/tcp/help-oob-tcp.txt +++ b/orte/mca/oob/tcp/help-oob-tcp.txt @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # Copyright (c) 2015-2018 Cisco Systems, Inc. All rights reserved # $COPYRIGHT$ # @@ -132,3 +132,7 @@ up aborting your job. Peer host: %s Peer process name: %s Peer Open MPI version: %s +# +[no-listeners] +No sockets were able to be opened on the available protocols +(IPv4 and/or IPv6). Please check your network and retry. diff --git a/orte/mca/oob/tcp/oob_tcp_listener.c b/orte/mca/oob/tcp/oob_tcp_listener.c index e97103c07f1..d34d1a2b18b 100644 --- a/orte/mca/oob/tcp/oob_tcp_listener.c +++ b/orte/mca/oob/tcp/oob_tcp_listener.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -98,7 +98,7 @@ static void connection_event_handler(int sd, short flags, void* cbdata); */ int orte_oob_tcp_start_listening(void) { - int rc; + int rc = ORTE_SUCCESS, rc2 = ORTE_SUCCESS; mca_oob_tcp_listener_t *listener; /* if we don't have any TCP interfaces, we shouldn't be here */ @@ -112,19 +112,19 @@ int orte_oob_tcp_start_listening(void) } /* create listen socket(s) for incoming connection attempts */ - if (ORTE_SUCCESS != (rc = create_listen())) { - ORTE_ERROR_LOG(rc); - return rc; - } + rc = create_listen(); #if OPAL_ENABLE_IPV6 /* create listen socket(s) for incoming connection attempts */ - if (ORTE_SUCCESS != (rc = create_listen6())) { - ORTE_ERROR_LOG(rc); - return rc; - } + rc2 = create_listen6(); #endif + if (ORTE_SUCCESS != rc && ORTE_SUCCESS != rc2) { + /* we were unable to open any listening sockets */ + opal_show_help("help-oob-tcp.txt", "no-listeners", true); + return ORTE_ERR_FATAL; + } + /* if I am the HNP, start a listening thread so we can * harvest connection requests as rapidly as possible */ From 49a2558eff9a14327398097543f1b47d59d7ccde Mon Sep 17 00:00:00 2001 From: Geoffrey Paulsen Date: Mon, 9 Sep 2019 14:48:52 -0400 Subject: [PATCH 89/94] Reving VERSION to v4.0.2rc2 Reving VERSION to v4.0.2rc2 Signed-off-by: Geoffrey Paulsen --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 1b15d265b58..b4422111a08 100644 --- a/VERSION +++ b/VERSION @@ -30,7 +30,7 @@ release=2 # requirement is that it must be entirely printable ASCII characters # and have no white space. -greek=rc1 +greek=rc2 # If repo_rev is empty, then the repository version number will be # obtained during "make dist" via the "git describe --tags --always" From 93dbc16696a1292e3988e2a9d51146da1b5f8da6 Mon Sep 17 00:00:00 2001 From: Thomas Naughton Date: Tue, 2 Oct 2018 11:10:57 -0400 Subject: [PATCH 90/94] WIP: carry Tau memory_instrumentation bits to master See https://github.com/ParaToolsInc/ompi.git --- ompi/communicator/comm_init.c | 2 ++ ompi/group/group_init.c | 17 ++++++++++++++++- ompi/mca/pml/ob1/pml_ob1_comm.c | 4 ++++ ompi/mca/pml/yalla/pml_yalla.c | 4 ++++ ompi/win/win.c | 2 ++ opal/class/opal_object.c | 24 ++++++++++++++++++++++++ opal/class/opal_object.h | 11 +++++++++++ oshmem/proc/proc.c | 7 +++++++ 8 files changed, 70 insertions(+), 1 deletion(-) diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index 75aac4d49e3..9f9211cbb1a 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -242,6 +242,7 @@ ompi_communicator_t *ompi_comm_allocate ( int local_size, int remote_size ) /* create new communicator element */ new_comm = OBJ_NEW(ompi_communicator_t); + Tau_start_class_allocation(new_comm->c_base.obj_class->cls_name, 0, 0); new_comm->super.s_info = NULL; new_comm->c_local_group = ompi_group_allocate ( local_size ); if ( 0 < remote_size ) { @@ -258,6 +259,7 @@ ompi_communicator_t *ompi_comm_allocate ( int local_size, int remote_size ) /* fill in the inscribing hyper-cube dimensions */ new_comm->c_cube_dim = opal_cube_dim(local_size); + Tau_stop_class_allocation(new_comm->c_base.obj_class->cls_name, 0); return new_comm; } diff --git a/ompi/group/group_init.c b/ompi/group/group_init.c index 674e4749eda..dd10141e5f0 100644 --- a/ompi/group/group_init.c +++ b/ompi/group/group_init.c @@ -57,7 +57,11 @@ ompi_predefined_group_t *ompi_mpi_group_null_addr = &ompi_mpi_group_null; ompi_group_t *ompi_group_allocate(int group_size) { /* local variables */ + Tau_start_class_allocation("ompi_group_t", 0, 0); ompi_proc_t **procs = calloc (group_size, sizeof (ompi_proc_t *)); + Tau_start_class_allocation("ompi_proc_t **", group_size * sizeof(ompi_proc_t *), 0); + Tau_stop_class_allocation("ompi_proc_t **", 1); + Tau_stop_class_allocation("ompi_group_t", 0); ompi_group_t *new_group; if (NULL == procs) { @@ -81,6 +85,7 @@ ompi_group_t *ompi_group_allocate_plist_w_procs (ompi_proc_t **procs, int group_ /* create new group group element */ new_group = OBJ_NEW(ompi_group_t); + Tau_start_class_allocation(new_group->super.obj_class->cls_name, 0, 0); if (NULL == new_group) { return NULL; @@ -105,7 +110,7 @@ ompi_group_t *ompi_group_allocate_plist_w_procs (ompi_proc_t **procs, int group_ OMPI_GROUP_SET_DENSE(new_group); ompi_group_increment_proc_count (new_group); - + Tau_stop_class_allocation(new_group->super.obj_class->cls_name, 0); return new_group; } @@ -126,11 +131,14 @@ ompi_group_t *ompi_group_allocate_sporadic(int group_size) new_group = NULL; goto error_exit; } + Tau_start_class_allocation(new_group->super.obj_class->cls_name, 0, 0); /* allocate array of (grp_sporadic_list )'s */ if (0 < group_size) { new_group->sparse_data.grp_sporadic.grp_sporadic_list = (struct ompi_group_sporadic_list_t *)malloc (sizeof(struct ompi_group_sporadic_list_t ) * group_size); + Tau_start_class_allocation("ompi_group_sporadic_list_t *", sizeof(struct ompi_group_sporadic_list_t ) * group_size, 0); + Tau_stop_class_allocation("ompi_group_sporadic_list_t *", 1); /* non-empty group */ if ( NULL == new_group->sparse_data.grp_sporadic.grp_sporadic_list) { @@ -151,6 +159,7 @@ ompi_group_t *ompi_group_allocate_sporadic(int group_size) OMPI_GROUP_SET_SPORADIC(new_group); error_exit: + Tau_stop_class_allocation(new_group->super.obj_class->cls_name, 0); return new_group; } @@ -168,6 +177,7 @@ ompi_group_t *ompi_group_allocate_strided(void) new_group = NULL; goto error_exit; } + Tau_start_class_allocation(new_group->super.obj_class->cls_name, 0, 0); /* initialize our rank to MPI_UNDEFINED */ new_group->grp_my_rank = MPI_UNDEFINED; new_group->grp_proc_pointers = NULL; @@ -177,6 +187,7 @@ ompi_group_t *ompi_group_allocate_strided(void) new_group->sparse_data.grp_strided.grp_strided_last_element = -1; error_exit: /* return */ + Tau_stop_class_allocation(new_group->super.obj_class->cls_name, 0); return new_group; } ompi_group_t *ompi_group_allocate_bmap(int orig_group_size , int group_size) @@ -196,8 +207,11 @@ ompi_group_t *ompi_group_allocate_bmap(int orig_group_size , int group_size) goto error_exit; } /* allocate the unsigned char list */ + Tau_start_class_allocation(new_group->super.obj_class->cls_name, 0, 0); new_group->sparse_data.grp_bitmap.grp_bitmap_array = (unsigned char *)malloc (sizeof(unsigned char) * ompi_group_div_ceil(orig_group_size,BSIZE)); + Tau_start_class_allocation("unsigned char *", sizeof(struct ompi_group_sporadic_list_t ) * group_size, 0); + Tau_stop_class_allocation("unsigned char *", 1); new_group->sparse_data.grp_bitmap.grp_bitmap_array_len = ompi_group_div_ceil(orig_group_size,BSIZE); @@ -211,6 +225,7 @@ ompi_group_t *ompi_group_allocate_bmap(int orig_group_size , int group_size) error_exit: /* return */ + Tau_stop_class_allocation(new_group->super.obj_class->cls_name, 0); return new_group; } diff --git a/ompi/mca/pml/ob1/pml_ob1_comm.c b/ompi/mca/pml/ob1/pml_ob1_comm.c index 510704849da..824cbfe5138 100644 --- a/ompi/mca/pml/ob1/pml_ob1_comm.c +++ b/ompi/mca/pml/ob1/pml_ob1_comm.c @@ -91,7 +91,11 @@ OBJ_CLASS_INSTANCE( int mca_pml_ob1_comm_init_size (mca_pml_ob1_comm_t* comm, size_t size) { /* send message sequence-number support - sender side */ + Tau_start_class_allocation(comm->super.obj_class->cls_name, 0, 0); comm->procs = (mca_pml_ob1_comm_proc_t **) calloc(size, sizeof (mca_pml_ob1_comm_proc_t *)); + Tau_start_class_allocation("mca_pml_ob1_comm_proc_t", size * sizeof(mca_pml_ob1_comm_proc_t *), 0); + Tau_stop_class_allocation("mca_pml_ob1_comm_proc_t", 1); + Tau_stop_class_allocation(comm->super.obj_class->cls_name, 0); if(NULL == comm->procs) { return OMPI_ERR_OUT_OF_RESOURCE; } diff --git a/ompi/mca/pml/yalla/pml_yalla.c b/ompi/mca/pml/yalla/pml_yalla.c index 03bb65d420d..c0724c0810b 100644 --- a/ompi/mca/pml/yalla/pml_yalla.c +++ b/ompi/mca/pml/yalla/pml_yalla.c @@ -256,7 +256,11 @@ int mca_pml_yalla_add_procs(struct ompi_proc_t **procs, size_t nprocs) return OMPI_ERROR; } + Tau_start_class_allocation(procs[i]->super.super.super.obj_class->cls_name, 0, 0); + Tau_start_class_allocation("mxm_conn_h", sizeof(mxm_conn_h), 0); procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML] = conn; + Tau_stop_class_allocation("mxm_conn_h", 1); + Tau_stop_class_allocation(procs[i]->super.super.super.obj_class->cls_name, 0); } return OMPI_SUCCESS; diff --git a/ompi/win/win.c b/ompi/win/win.c index bd388f967ec..465badb9b87 100644 --- a/ompi/win/win.c +++ b/ompi/win/win.c @@ -147,6 +147,7 @@ static int alloc_window(struct ompi_communicator_t *comm, opal_info_t *info, int /* create the object */ win = OBJ_NEW(ompi_win_t); + Tau_start_class_allocation(win->w_base.obj_class->cls_name, 0, 0); if (NULL == win) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -186,6 +187,7 @@ static int alloc_window(struct ompi_communicator_t *comm, opal_info_t *info, int } *win_out = win; + Tau_stop_class_allocation(win->w_base.obj_class->cls_name, 0); return OMPI_SUCCESS; } diff --git a/opal/class/opal_object.c b/opal/class/opal_object.c index cd09d647f66..7547caf4c5d 100644 --- a/opal/class/opal_object.c +++ b/opal/class/opal_object.c @@ -33,6 +33,30 @@ #include "opal/class/opal_object.h" #include "opal/constants.h" +void __attribute__((weak)) Tau_track_class_allocation(const char * name, size_t size) { + +} + +void __attribute__((weak)) Tau_track_class_deallocation(const char * name, size_t size) { + +} + +void __attribute__((weak)) Tau_start_class_allocation(const char * name, size_t size, int include_in_parent) { + +} + +void __attribute__((weak)) Tau_stop_class_allocation(const char * name, int record) { + +} + +void __attribute__((weak)) Tau_start_class_deallocation(const char * name, size_t size, int include_in_parent) { + +} + +void __attribute__((weak)) Tau_stop_class_deallocation(const char * name, int record) { + +} + /* * Instantiation of class descriptor for the base class. This is * special, since be mark it as already initialized, with no parent diff --git a/opal/class/opal_object.h b/opal/class/opal_object.h index 4e2da95c204..18a76619ee5 100644 --- a/opal/class/opal_object.h +++ b/opal/class/opal_object.h @@ -132,6 +132,13 @@ BEGIN_C_DECLS #define OPAL_OBJ_MAGIC_ID ((0xdeafbeedULL << 32) + 0xdeafbeedULL) #endif +void __attribute__((weak)) Tau_track_class_allocation(const char * name, size_t size); +void __attribute__((weak)) Tau_track_class_deallocation(const char * name, size_t size); +void __attribute__((weak)) Tau_start_class_allocation(const char * name, size_t size, int include_in_parent); +void __attribute__((weak)) Tau_stop_class_allocation(const char * name, int record); +void __attribute__((weak)) Tau_start_class_deallocation(const char * name, size_t size, int include_in_parent); +void __attribute__((weak)) Tau_stop_class_deallocation(const char * name, int record); + /* typedefs ***********************************************************/ typedef struct opal_object_t opal_object_t; @@ -457,6 +464,7 @@ static inline void opal_obj_run_destructors(opal_object_t * object) assert(NULL != object->obj_class); + Tau_track_class_deallocation(object->obj_class->cls_name, object->obj_class->cls_sizeof); cls_destruct = object->obj_class->cls_destruct_array; while( NULL != *cls_destruct ) { (*cls_destruct)(object); @@ -480,6 +488,8 @@ static inline opal_object_t *opal_obj_new(opal_class_t * cls) opal_object_t *object; assert(cls->cls_sizeof >= sizeof(opal_object_t)); + Tau_start_class_allocation(cls->cls_name, cls->cls_sizeof, 0); + #if OPAL_WANT_MEMCHECKER object = (opal_object_t *) calloc(1, cls->cls_sizeof); #else @@ -493,6 +503,7 @@ static inline opal_object_t *opal_obj_new(opal_class_t * cls) object->obj_reference_count = 1; opal_obj_run_constructors(object); } + Tau_stop_class_allocation(cls->cls_name, 1); return object; } diff --git a/oshmem/proc/proc.c b/oshmem/proc/proc.c index 8aa67726f9b..527cf0b22f5 100644 --- a/oshmem/proc/proc.c +++ b/oshmem/proc/proc.c @@ -168,6 +168,8 @@ oshmem_group_t* oshmem_proc_group_create(int pe_start, int pe_stride, int pe_siz } group = OBJ_NEW(oshmem_group_t); + Tau_start_class_allocation(group->base.obj_class->cls_name, 0, 0); + if (NULL == group) { return NULL; } @@ -179,7 +181,10 @@ oshmem_group_t* oshmem_proc_group_create(int pe_start, int pe_stride, int pe_siz /* allocate an array */ proc_array = (ompi_proc_t**) malloc(pe_size * sizeof(ompi_proc_t*)); + Tau_start_class_allocation("ompi_proc_t **", pe_size * sizeof(ompi_proc_t*), 0); + Tau_stop_class_allocation("ompi_proc_t **", 1); if (NULL == proc_array) { + Tau_stop_class_allocation(group->base.obj_class->cls_name, 0); OBJ_RELEASE(group); OPAL_THREAD_UNLOCK(&oshmem_proc_lock); return NULL ; @@ -231,6 +236,7 @@ oshmem_group_t* oshmem_proc_group_create(int pe_start, int pe_stride, int pe_siz if (OSHMEM_SUCCESS != mca_scoll_base_select(group)) { opal_output(0, "Error: No collective modules are available: group is not created, returning NULL"); + Tau_stop_class_allocation(group->base.obj_class->cls_name, 0); oshmem_proc_group_destroy_internal(group, 0); OPAL_THREAD_UNLOCK(&oshmem_proc_lock); return NULL; @@ -244,6 +250,7 @@ oshmem_group_t* oshmem_proc_group_create(int pe_start, int pe_stride, int pe_siz } OPAL_THREAD_UNLOCK(&oshmem_proc_lock); + Tau_stop_class_allocation(group->base.obj_class->cls_name, 0); return group; } From 6d0adca540110c66bdea3ebad32a16f169b23efd Mon Sep 17 00:00:00 2001 From: Thomas Naughton Date: Fri, 4 Jan 2019 17:50:49 -0500 Subject: [PATCH 91/94] update to new opal_infosubscriber_t structure There was a restructuring of the comm/win/group objects that shifted the fields down for getting at the class name. This appears to relate to opal_infosubscriber_t changes from 50aa143. NOTE: The opal_infosubscriber_t changes were not applied to the oshmem_group_t structure, so no need for changes there. --- ompi/communicator/comm_init.c | 4 ++-- ompi/win/win.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index 9f9211cbb1a..70152e21476 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -242,7 +242,7 @@ ompi_communicator_t *ompi_comm_allocate ( int local_size, int remote_size ) /* create new communicator element */ new_comm = OBJ_NEW(ompi_communicator_t); - Tau_start_class_allocation(new_comm->c_base.obj_class->cls_name, 0, 0); + Tau_start_class_allocation(new_comm->super.s_base.obj_class->cls_name, 0, 0); new_comm->super.s_info = NULL; new_comm->c_local_group = ompi_group_allocate ( local_size ); if ( 0 < remote_size ) { @@ -259,7 +259,7 @@ ompi_communicator_t *ompi_comm_allocate ( int local_size, int remote_size ) /* fill in the inscribing hyper-cube dimensions */ new_comm->c_cube_dim = opal_cube_dim(local_size); - Tau_stop_class_allocation(new_comm->c_base.obj_class->cls_name, 0); + Tau_stop_class_allocation(new_comm->super.s_base.obj_class->cls_name, 0); return new_comm; } diff --git a/ompi/win/win.c b/ompi/win/win.c index 465badb9b87..352f0aa186d 100644 --- a/ompi/win/win.c +++ b/ompi/win/win.c @@ -147,7 +147,7 @@ static int alloc_window(struct ompi_communicator_t *comm, opal_info_t *info, int /* create the object */ win = OBJ_NEW(ompi_win_t); - Tau_start_class_allocation(win->w_base.obj_class->cls_name, 0, 0); + Tau_start_class_allocation(win->super.s_base.obj_class->cls_name, 0, 0); if (NULL == win) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -187,7 +187,7 @@ static int alloc_window(struct ompi_communicator_t *comm, opal_info_t *info, int } *win_out = win; - Tau_stop_class_allocation(win->w_base.obj_class->cls_name, 0); + Tau_stop_class_allocation(win->super.s_base.obj_class->cls_name, 0); return OMPI_SUCCESS; } From 332530845ad8669a7ae5c017c2526630f5664dcb Mon Sep 17 00:00:00 2001 From: Thomas Naughton Date: Tue, 15 Jan 2019 13:55:42 -0500 Subject: [PATCH 92/94] move profile funcs/stubs out to memprof file Guard these using existing OPAL_ENABLE_MEM_PROFILE option that is enabled via '--enable-mem-profile' configury. --- config/opal_configure_options.m4 | 1 + opal/class/opal_object.c | 24 +---------------- opal/class/opal_object.h | 7 +---- opal/util/Makefile.am | 7 +++++ opal/util/memprof.c | 45 ++++++++++++++++++++++++++++++++ opal/util/memprof.h | 39 +++++++++++++++++++++++++++ 6 files changed, 94 insertions(+), 29 deletions(-) create mode 100644 opal/util/memprof.c create mode 100644 opal/util/memprof.h diff --git a/config/opal_configure_options.m4 b/config/opal_configure_options.m4 index 43fcaf3469d..f02a6d0eb4f 100644 --- a/config/opal_configure_options.m4 +++ b/config/opal_configure_options.m4 @@ -120,6 +120,7 @@ else fi AC_DEFINE_UNQUOTED(OPAL_ENABLE_MEM_PROFILE, $WANT_MEM_PROFILE, [Whether we want the memory profiling or not]) +AM_CONDITIONAL([OPAL_ENABLE_MEM_PROFILE], [test "$WANT_MEM_PROFILE" = "1"]) # # Developer picky compiler options diff --git a/opal/class/opal_object.c b/opal/class/opal_object.c index 7547caf4c5d..4dc09318b2a 100644 --- a/opal/class/opal_object.c +++ b/opal/class/opal_object.c @@ -32,30 +32,8 @@ #include "opal/sys/atomic.h" #include "opal/class/opal_object.h" #include "opal/constants.h" +#include "opal/util/memprof.h" -void __attribute__((weak)) Tau_track_class_allocation(const char * name, size_t size) { - -} - -void __attribute__((weak)) Tau_track_class_deallocation(const char * name, size_t size) { - -} - -void __attribute__((weak)) Tau_start_class_allocation(const char * name, size_t size, int include_in_parent) { - -} - -void __attribute__((weak)) Tau_stop_class_allocation(const char * name, int record) { - -} - -void __attribute__((weak)) Tau_start_class_deallocation(const char * name, size_t size, int include_in_parent) { - -} - -void __attribute__((weak)) Tau_stop_class_deallocation(const char * name, int record) { - -} /* * Instantiation of class descriptor for the base class. This is diff --git a/opal/class/opal_object.h b/opal/class/opal_object.h index 18a76619ee5..15e85c251d0 100644 --- a/opal/class/opal_object.h +++ b/opal/class/opal_object.h @@ -124,6 +124,7 @@ #include #include "opal/threads/thread_usage.h" +#include "opal/util/memprof.h" BEGIN_C_DECLS @@ -132,12 +133,6 @@ BEGIN_C_DECLS #define OPAL_OBJ_MAGIC_ID ((0xdeafbeedULL << 32) + 0xdeafbeedULL) #endif -void __attribute__((weak)) Tau_track_class_allocation(const char * name, size_t size); -void __attribute__((weak)) Tau_track_class_deallocation(const char * name, size_t size); -void __attribute__((weak)) Tau_start_class_allocation(const char * name, size_t size, int include_in_parent); -void __attribute__((weak)) Tau_stop_class_allocation(const char * name, int record); -void __attribute__((weak)) Tau_start_class_deallocation(const char * name, size_t size, int include_in_parent); -void __attribute__((weak)) Tau_stop_class_deallocation(const char * name, int record); /* typedefs ***********************************************************/ diff --git a/opal/util/Makefile.am b/opal/util/Makefile.am index 76bef717b68..9d5d3abfd35 100644 --- a/opal/util/Makefile.am +++ b/opal/util/Makefile.am @@ -17,6 +17,8 @@ # Copyright (c) 2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2016-2017 IBM Corporation. All rights reserved. +# Copyright (c) 2019 UT-Battelle, LLC. All rights reserved. +# # $COPYRIGHT$ # # Additional copyrights may follow @@ -55,6 +57,7 @@ headers = \ if.h \ keyval_parse.h \ malloc.h \ + memprof.h \ net.h \ numtostr.h \ opal_environ.h \ @@ -119,6 +122,10 @@ if OPAL_COMPILE_TIMING libopalutil_la_SOURCES += timings.c endif +if OPAL_ENABLE_MEM_PROFILE +libopalutil_la_SOURCES += memprof.c +endif + libopalutil_la_LIBADD = \ keyval/libopalutilkeyval.la libopalutil_la_DEPENDENCIES = \ diff --git a/opal/util/memprof.c b/opal/util/memprof.c new file mode 100644 index 00000000000..8afbff0f574 --- /dev/null +++ b/opal/util/memprof.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2019 UT-Battelle, LLC. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/util/memprof.h" + + +void __attribute__((weak)) Tau_track_class_allocation(const char * name, + size_t size) { + +} + +void __attribute__((weak)) Tau_track_class_deallocation(const char * name, + size_t size) { + +} + +void __attribute__((weak)) Tau_start_class_allocation(const char * name, + size_t size, + int include_in_parent) { + +} + +void __attribute__((weak)) Tau_stop_class_allocation(const char * name, + int record) { + +} + +void __attribute__((weak)) Tau_start_class_deallocation(const char * name, + size_t size, + int include_in_parent) { + +} + +void __attribute__((weak)) Tau_stop_class_deallocation(const char * name, + int record) { + +} diff --git a/opal/util/memprof.h b/opal/util/memprof.h new file mode 100644 index 00000000000..ddc8fafa46e --- /dev/null +++ b/opal/util/memprof.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2018 UT-Battelle, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_MEMPROF_H +#define OPAL_MEMPROF_H + +BEGIN_C_DECLS + +#if OPAL_ENABLE_MEM_PROFILE + + void __attribute__((weak)) Tau_track_class_allocation(const char * name, size_t size); + void __attribute__((weak)) Tau_track_class_deallocation(const char * name, size_t size); + void __attribute__((weak)) Tau_start_class_allocation(const char * name, size_t size, int include_in_parent); + void __attribute__((weak)) Tau_stop_class_allocation(const char * name, int record); + void __attribute__((weak)) Tau_start_class_deallocation(const char * name, size_t size, int include_in_parent); + void __attribute__((weak)) Tau_stop_class_deallocation(const char * name, int record); + +#endif /* OPAL_ENABLE_MEM_PROFILE */ + +END_C_DECLS + +#endif /* OPAL_MEMPROF_H */ From a6eeecfee68406bb82d7bff54a91c113ac6240ae Mon Sep 17 00:00:00 2001 From: Thomas Naughton Date: Tue, 15 Jan 2019 21:30:56 -0500 Subject: [PATCH 93/94] use macros in code and add comments + fix include --- ompi/communicator/comm_init.c | 6 ++- ompi/group/group_init.c | 34 ++++++++-------- ompi/mca/pml/ob1/pml_ob1_comm.c | 9 +++-- ompi/mca/pml/yalla/pml_yalla.c | 9 +++-- ompi/win/win.c | 6 ++- opal/class/opal_object.h | 6 +-- opal/util/memprof.c | 2 +- opal/util/memprof.h | 71 ++++++++++++++++++++++++++++----- oshmem/proc/proc.c | 13 +++--- 9 files changed, 108 insertions(+), 48 deletions(-) diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index 70152e21476..9c49270fa75 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -35,6 +35,8 @@ #include "opal/util/bit_ops.h" #include "opal/util/info_subscriber.h" +#include "opal/util/string_copy.h" +#include "opal/util/memprof.h" #include "opal/mca/pmix/pmix.h" #include "ompi/constants.h" #include "ompi/mca/pml/pml.h" @@ -242,7 +244,7 @@ ompi_communicator_t *ompi_comm_allocate ( int local_size, int remote_size ) /* create new communicator element */ new_comm = OBJ_NEW(ompi_communicator_t); - Tau_start_class_allocation(new_comm->super.s_base.obj_class->cls_name, 0, 0); + OPAL_MEMPROF_START_ALLOC(new_comm->super.s_base.obj_class->cls_name, 0, 0); new_comm->super.s_info = NULL; new_comm->c_local_group = ompi_group_allocate ( local_size ); if ( 0 < remote_size ) { @@ -259,7 +261,7 @@ ompi_communicator_t *ompi_comm_allocate ( int local_size, int remote_size ) /* fill in the inscribing hyper-cube dimensions */ new_comm->c_cube_dim = opal_cube_dim(local_size); - Tau_stop_class_allocation(new_comm->super.s_base.obj_class->cls_name, 0); + OPAL_MEMPROF_STOP_ALLOC(new_comm->super.s_base.obj_class->cls_name, 0); return new_comm; } diff --git a/ompi/group/group_init.c b/ompi/group/group_init.c index dd10141e5f0..7b4175f9829 100644 --- a/ompi/group/group_init.c +++ b/ompi/group/group_init.c @@ -28,6 +28,8 @@ #include "ompi/constants.h" #include "mpi.h" +#include "opal/util/memprof.h" + /* define class information */ static void ompi_group_construct(ompi_group_t *); static void ompi_group_destruct(ompi_group_t *); @@ -57,11 +59,11 @@ ompi_predefined_group_t *ompi_mpi_group_null_addr = &ompi_mpi_group_null; ompi_group_t *ompi_group_allocate(int group_size) { /* local variables */ - Tau_start_class_allocation("ompi_group_t", 0, 0); + OPAL_MEMPROF_START_ALLOC("ompi_group_t", 0, 0); ompi_proc_t **procs = calloc (group_size, sizeof (ompi_proc_t *)); - Tau_start_class_allocation("ompi_proc_t **", group_size * sizeof(ompi_proc_t *), 0); - Tau_stop_class_allocation("ompi_proc_t **", 1); - Tau_stop_class_allocation("ompi_group_t", 0); + OPAL_MEMPROF_START_ALLOC("ompi_proc_t **", group_size * sizeof(ompi_proc_t *), 0); + OPAL_MEMPROF_STOP_ALLOC("ompi_proc_t **", 1); + OPAL_MEMPROF_STOP_ALLOC("ompi_group_t", 0); ompi_group_t *new_group; if (NULL == procs) { @@ -85,7 +87,7 @@ ompi_group_t *ompi_group_allocate_plist_w_procs (ompi_proc_t **procs, int group_ /* create new group group element */ new_group = OBJ_NEW(ompi_group_t); - Tau_start_class_allocation(new_group->super.obj_class->cls_name, 0, 0); + OPAL_MEMPROF_START_ALLOC(new_group->super.obj_class->cls_name, 0, 0); if (NULL == new_group) { return NULL; @@ -110,7 +112,7 @@ ompi_group_t *ompi_group_allocate_plist_w_procs (ompi_proc_t **procs, int group_ OMPI_GROUP_SET_DENSE(new_group); ompi_group_increment_proc_count (new_group); - Tau_stop_class_allocation(new_group->super.obj_class->cls_name, 0); + OPAL_MEMPROF_STOP_ALLOC(new_group->super.obj_class->cls_name, 0); return new_group; } @@ -131,14 +133,14 @@ ompi_group_t *ompi_group_allocate_sporadic(int group_size) new_group = NULL; goto error_exit; } - Tau_start_class_allocation(new_group->super.obj_class->cls_name, 0, 0); + OPAL_MEMPROF_START_ALLOC(new_group->super.obj_class->cls_name, 0, 0); /* allocate array of (grp_sporadic_list )'s */ if (0 < group_size) { new_group->sparse_data.grp_sporadic.grp_sporadic_list = (struct ompi_group_sporadic_list_t *)malloc (sizeof(struct ompi_group_sporadic_list_t ) * group_size); - Tau_start_class_allocation("ompi_group_sporadic_list_t *", sizeof(struct ompi_group_sporadic_list_t ) * group_size, 0); - Tau_stop_class_allocation("ompi_group_sporadic_list_t *", 1); + OPAL_MEMPROF_START_ALLOC("ompi_group_sporadic_list_t *", sizeof(struct ompi_group_sporadic_list_t ) * group_size, 0); + OPAL_MEMPROF_STOP_ALLOC("ompi_group_sporadic_list_t *", 1); /* non-empty group */ if ( NULL == new_group->sparse_data.grp_sporadic.grp_sporadic_list) { @@ -159,7 +161,7 @@ ompi_group_t *ompi_group_allocate_sporadic(int group_size) OMPI_GROUP_SET_SPORADIC(new_group); error_exit: - Tau_stop_class_allocation(new_group->super.obj_class->cls_name, 0); + OPAL_MEMPROF_STOP_ALLOC(new_group->super.obj_class->cls_name, 0); return new_group; } @@ -177,7 +179,7 @@ ompi_group_t *ompi_group_allocate_strided(void) new_group = NULL; goto error_exit; } - Tau_start_class_allocation(new_group->super.obj_class->cls_name, 0, 0); + OPAL_MEMPROF_START_ALLOC(new_group->super.obj_class->cls_name, 0, 0); /* initialize our rank to MPI_UNDEFINED */ new_group->grp_my_rank = MPI_UNDEFINED; new_group->grp_proc_pointers = NULL; @@ -187,7 +189,7 @@ ompi_group_t *ompi_group_allocate_strided(void) new_group->sparse_data.grp_strided.grp_strided_last_element = -1; error_exit: /* return */ - Tau_stop_class_allocation(new_group->super.obj_class->cls_name, 0); + OPAL_MEMPROF_STOP_ALLOC(new_group->super.obj_class->cls_name, 0); return new_group; } ompi_group_t *ompi_group_allocate_bmap(int orig_group_size , int group_size) @@ -207,11 +209,11 @@ ompi_group_t *ompi_group_allocate_bmap(int orig_group_size , int group_size) goto error_exit; } /* allocate the unsigned char list */ - Tau_start_class_allocation(new_group->super.obj_class->cls_name, 0, 0); + OPAL_MEMPROF_START_ALLOC(new_group->super.obj_class->cls_name, 0, 0); new_group->sparse_data.grp_bitmap.grp_bitmap_array = (unsigned char *)malloc (sizeof(unsigned char) * ompi_group_div_ceil(orig_group_size,BSIZE)); - Tau_start_class_allocation("unsigned char *", sizeof(struct ompi_group_sporadic_list_t ) * group_size, 0); - Tau_stop_class_allocation("unsigned char *", 1); + OPAL_MEMPROF_START_ALLOC("unsigned char *", sizeof(struct ompi_group_sporadic_list_t ) * group_size, 0); + OPAL_MEMPROF_STOP_ALLOC("unsigned char *", 1); new_group->sparse_data.grp_bitmap.grp_bitmap_array_len = ompi_group_div_ceil(orig_group_size,BSIZE); @@ -225,7 +227,7 @@ ompi_group_t *ompi_group_allocate_bmap(int orig_group_size , int group_size) error_exit: /* return */ - Tau_stop_class_allocation(new_group->super.obj_class->cls_name, 0); + OPAL_MEMPROF_STOP_ALLOC(new_group->super.obj_class->cls_name, 0); return new_group; } diff --git a/ompi/mca/pml/ob1/pml_ob1_comm.c b/ompi/mca/pml/ob1/pml_ob1_comm.c index 824cbfe5138..19b3b6b5161 100644 --- a/ompi/mca/pml/ob1/pml_ob1_comm.c +++ b/ompi/mca/pml/ob1/pml_ob1_comm.c @@ -17,6 +17,7 @@ */ #include "ompi_config.h" +#include "opal/util/memprof.h" #include #include "pml_ob1.h" @@ -91,11 +92,11 @@ OBJ_CLASS_INSTANCE( int mca_pml_ob1_comm_init_size (mca_pml_ob1_comm_t* comm, size_t size) { /* send message sequence-number support - sender side */ - Tau_start_class_allocation(comm->super.obj_class->cls_name, 0, 0); + OPAL_MEMPROF_START_ALLOC(comm->super.obj_class->cls_name, 0, 0); comm->procs = (mca_pml_ob1_comm_proc_t **) calloc(size, sizeof (mca_pml_ob1_comm_proc_t *)); - Tau_start_class_allocation("mca_pml_ob1_comm_proc_t", size * sizeof(mca_pml_ob1_comm_proc_t *), 0); - Tau_stop_class_allocation("mca_pml_ob1_comm_proc_t", 1); - Tau_stop_class_allocation(comm->super.obj_class->cls_name, 0); + OPAL_MEMPROF_START_ALLOC("mca_pml_ob1_comm_proc_t", size * sizeof(mca_pml_ob1_comm_proc_t *), 0); + OPAL_MEMPROF_STOP_ALLOC("mca_pml_ob1_comm_proc_t", 1); + OPAL_MEMPROF_STOP_ALLOC(comm->super.obj_class->cls_name, 0); if(NULL == comm->procs) { return OMPI_ERR_OUT_OF_RESOURCE; } diff --git a/ompi/mca/pml/yalla/pml_yalla.c b/ompi/mca/pml/yalla/pml_yalla.c index c0724c0810b..46cb125ffae 100644 --- a/ompi/mca/pml/yalla/pml_yalla.c +++ b/ompi/mca/pml/yalla/pml_yalla.c @@ -19,6 +19,7 @@ #include "opal/runtime/opal.h" #include "opal/memoryhooks/memory.h" +#include "opal/util/memprof.h" #include "opal/mca/memory/base/base.h" #include "opal/mca/pmix/pmix.h" #include "ompi/mca/pml/base/pml_base_bsend.h" @@ -256,11 +257,11 @@ int mca_pml_yalla_add_procs(struct ompi_proc_t **procs, size_t nprocs) return OMPI_ERROR; } - Tau_start_class_allocation(procs[i]->super.super.super.obj_class->cls_name, 0, 0); - Tau_start_class_allocation("mxm_conn_h", sizeof(mxm_conn_h), 0); + OPAL_MEMPROF_START_ALLOC(procs[i]->super.super.super.obj_class->cls_name, 0, 0); + OPAL_MEMPROF_START_ALLOC("mxm_conn_h", sizeof(mxm_conn_h), 0); procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PML] = conn; - Tau_stop_class_allocation("mxm_conn_h", 1); - Tau_stop_class_allocation(procs[i]->super.super.super.obj_class->cls_name, 0); + OPAL_MEMPROF_STOP_ALLOC("mxm_conn_h", 1); + OPAL_MEMPROF_STOP_ALLOC(procs[i]->super.super.super.obj_class->cls_name, 0); } return OMPI_SUCCESS; diff --git a/ompi/win/win.c b/ompi/win/win.c index 352f0aa186d..3eb0207ee93 100644 --- a/ompi/win/win.c +++ b/ompi/win/win.c @@ -27,6 +27,8 @@ #include "ompi_config.h" #include "opal/util/info_subscriber.h" +#include "opal/util/string_copy.h" +#include "opal/util/memprof.h" #include "mpi.h" #include "ompi/win/win.h" @@ -147,7 +149,7 @@ static int alloc_window(struct ompi_communicator_t *comm, opal_info_t *info, int /* create the object */ win = OBJ_NEW(ompi_win_t); - Tau_start_class_allocation(win->super.s_base.obj_class->cls_name, 0, 0); + OPAL_MEMPROF_START_ALLOC(win->super.s_base.obj_class->cls_name, 0, 0); if (NULL == win) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -187,7 +189,7 @@ static int alloc_window(struct ompi_communicator_t *comm, opal_info_t *info, int } *win_out = win; - Tau_stop_class_allocation(win->super.s_base.obj_class->cls_name, 0); + OPAL_MEMPROF_STOP_ALLOC(win->super.s_base.obj_class->cls_name, 0); return OMPI_SUCCESS; } diff --git a/opal/class/opal_object.h b/opal/class/opal_object.h index 15e85c251d0..49b13fd9489 100644 --- a/opal/class/opal_object.h +++ b/opal/class/opal_object.h @@ -459,7 +459,7 @@ static inline void opal_obj_run_destructors(opal_object_t * object) assert(NULL != object->obj_class); - Tau_track_class_deallocation(object->obj_class->cls_name, object->obj_class->cls_sizeof); + OPAL_MEMPROF_TRACK_DEALLOC(object->obj_class->cls_name, object->obj_class->cls_sizeof); cls_destruct = object->obj_class->cls_destruct_array; while( NULL != *cls_destruct ) { (*cls_destruct)(object); @@ -483,7 +483,7 @@ static inline opal_object_t *opal_obj_new(opal_class_t * cls) opal_object_t *object; assert(cls->cls_sizeof >= sizeof(opal_object_t)); - Tau_start_class_allocation(cls->cls_name, cls->cls_sizeof, 0); + OPAL_MEMPROF_START_ALLOC(cls->cls_name, cls->cls_sizeof, 0); #if OPAL_WANT_MEMCHECKER object = (opal_object_t *) calloc(1, cls->cls_sizeof); @@ -498,7 +498,7 @@ static inline opal_object_t *opal_obj_new(opal_class_t * cls) object->obj_reference_count = 1; opal_obj_run_constructors(object); } - Tau_stop_class_allocation(cls->cls_name, 1); + OPAL_MEMPROF_STOP_ALLOC(cls->cls_name, 1); return object; } diff --git a/opal/util/memprof.c b/opal/util/memprof.c index 8afbff0f574..656e262a8fd 100644 --- a/opal/util/memprof.c +++ b/opal/util/memprof.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2018 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2019 UT-Battelle, LLC. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/util/memprof.h b/opal/util/memprof.h index ddc8fafa46e..dda91e29a89 100644 --- a/opal/util/memprof.h +++ b/opal/util/memprof.h @@ -1,14 +1,4 @@ /* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. * Copyright (c) 2018 UT-Battelle, LLC. All rights reserved. * * $COPYRIGHT$ @@ -25,6 +15,17 @@ BEGIN_C_DECLS #if OPAL_ENABLE_MEM_PROFILE + /* + * NOTE: Must configure OMPI w/ '--enable-mem-profile', + * otherwise the macros are no-ops. + * Tested with TAU v1.27.1 and v1.28 and PDT 3.25 + * for gathering memory profiling data about OMPI. + * + * Example usage: + * orterun -np 2 tau_exec -T mpi,pdt ring_c + * tau_mem_summarize.py . > ring-np2.csv + */ + void __attribute__((weak)) Tau_track_class_allocation(const char * name, size_t size); void __attribute__((weak)) Tau_track_class_deallocation(const char * name, size_t size); void __attribute__((weak)) Tau_start_class_allocation(const char * name, size_t size, int include_in_parent); @@ -32,6 +33,56 @@ BEGIN_C_DECLS void __attribute__((weak)) Tau_start_class_deallocation(const char * name, size_t size, int include_in_parent); void __attribute__((weak)) Tau_stop_class_deallocation(const char * name, int record); + #define OPAL_MEMPROF_TRACK_ALLOC(name, size) \ + do { \ + Tau_track_class_allocation(name, size); \ + } while(0) + + #define OPAL_MEMPROF_TRACK_DEALLOC(name, size) \ + do { \ + Tau_track_class_deallocation(name, size); \ + } while(0) + + #define OPAL_MEMPROF_START_ALLOC(name, size, include_in_parent) \ + do { \ + Tau_start_class_allocation(name, size, include_in_parent); \ + } while(0) + + #define OPAL_MEMPROF_STOP_ALLOC(name, record) \ + do { \ + Tau_stop_class_allocation(name, record); \ + } while(0) + + #define OPAL_MEMPROF_START_DEALLOC(name, size, include_in_parent) \ + do { \ + Tau_start_class_deallocation(name, size, include_in_parent); \ + } while(0) + + #define OPAL_MEMPROF_STOP_DEALLOC(name, record) \ + do { \ + Tau_stop_class_deallocation(name, record); \ + } while(0) + +#else + + #define OPAL_MEMPROF_TRACK_ALLOC(name, size) \ + do { } while(0) + + #define OPAL_MEMPROF_TRACK_DEALLOC(name, size) \ + do { } while(0) + + #define OPAL_MEMPROF_START_ALLOC(name, size, include_in_parent) \ + do { } while(0) + + #define OPAL_MEMPROF_STOP_ALLOC(name, record) \ + do { } while(0) + + #define OPAL_MEMPROF_START_DEALLOC(name, size, include_in_parent) \ + do { } while(0) + + #define OPAL_MEMPROF_STOP_DEALLOC(name, record) \ + do { } while(0) + #endif /* OPAL_ENABLE_MEM_PROFILE */ END_C_DECLS diff --git a/oshmem/proc/proc.c b/oshmem/proc/proc.c index 527cf0b22f5..27eb4cedbed 100644 --- a/oshmem/proc/proc.c +++ b/oshmem/proc/proc.c @@ -35,6 +35,7 @@ #include "opal/dss/dss.h" #include "opal/util/arch.h" #include "opal/class/opal_list.h" +#include "opal/util/memprof.h" static opal_mutex_t oshmem_proc_lock; @@ -168,7 +169,7 @@ oshmem_group_t* oshmem_proc_group_create(int pe_start, int pe_stride, int pe_siz } group = OBJ_NEW(oshmem_group_t); - Tau_start_class_allocation(group->base.obj_class->cls_name, 0, 0); + OPAL_MEMPROF_START_ALLOC(group->base.obj_class->cls_name, 0, 0); if (NULL == group) { return NULL; @@ -181,10 +182,10 @@ oshmem_group_t* oshmem_proc_group_create(int pe_start, int pe_stride, int pe_siz /* allocate an array */ proc_array = (ompi_proc_t**) malloc(pe_size * sizeof(ompi_proc_t*)); - Tau_start_class_allocation("ompi_proc_t **", pe_size * sizeof(ompi_proc_t*), 0); - Tau_stop_class_allocation("ompi_proc_t **", 1); + OPAL_MEMPROF_START_ALLOC("ompi_proc_t **", pe_size * sizeof(ompi_proc_t*), 0); + OPAL_MEMPROF_STOP_ALLOC("ompi_proc_t **", 1); if (NULL == proc_array) { - Tau_stop_class_allocation(group->base.obj_class->cls_name, 0); + OPAL_MEMPROF_STOP_ALLOC(group->base.obj_class->cls_name, 0); OBJ_RELEASE(group); OPAL_THREAD_UNLOCK(&oshmem_proc_lock); return NULL ; @@ -236,7 +237,7 @@ oshmem_group_t* oshmem_proc_group_create(int pe_start, int pe_stride, int pe_siz if (OSHMEM_SUCCESS != mca_scoll_base_select(group)) { opal_output(0, "Error: No collective modules are available: group is not created, returning NULL"); - Tau_stop_class_allocation(group->base.obj_class->cls_name, 0); + OPAL_MEMPROF_STOP_ALLOC(group->base.obj_class->cls_name, 0); oshmem_proc_group_destroy_internal(group, 0); OPAL_THREAD_UNLOCK(&oshmem_proc_lock); return NULL; @@ -250,7 +251,7 @@ oshmem_group_t* oshmem_proc_group_create(int pe_start, int pe_stride, int pe_siz } OPAL_THREAD_UNLOCK(&oshmem_proc_lock); - Tau_stop_class_allocation(group->base.obj_class->cls_name, 0); + OPAL_MEMPROF_STOP_ALLOC(group->base.obj_class->cls_name, 0); return group; } From 2ac51feb8d1e82908c14b59aef24c6b9d3b4d95f Mon Sep 17 00:00:00 2001 From: Thomas Naughton Date: Tue, 12 Mar 2019 14:22:10 -0400 Subject: [PATCH 94/94] fix bad merge to avoid old header include --- ompi/communicator/comm_init.c | 1 - ompi/win/win.c | 1 - 2 files changed, 2 deletions(-) diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index 9c49270fa75..464c6b99727 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -35,7 +35,6 @@ #include "opal/util/bit_ops.h" #include "opal/util/info_subscriber.h" -#include "opal/util/string_copy.h" #include "opal/util/memprof.h" #include "opal/mca/pmix/pmix.h" #include "ompi/constants.h" diff --git a/ompi/win/win.c b/ompi/win/win.c index 3eb0207ee93..4277758f5b1 100644 --- a/ompi/win/win.c +++ b/ompi/win/win.c @@ -27,7 +27,6 @@ #include "ompi_config.h" #include "opal/util/info_subscriber.h" -#include "opal/util/string_copy.h" #include "opal/util/memprof.h" #include "mpi.h"