Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tjn congest onlytimer #10

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions ompi/mpi/c/alltoall.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
{
int err;
size_t recvtype_size;
opal_timer_t timer = 0; /* SPC */

SPC_RECORD(OMPI_SPC_ALLTOALL, 1);

Expand Down Expand Up @@ -106,10 +107,15 @@ int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype,

OPAL_CR_ENTER_LIBRARY();

SPC_TIMER_START(OMPI_SPC_TIME_ALLTOALL, &timer);

/* Invoke the coll component to perform the back-end operation */
err = comm->c_coll->coll_alltoall(sendbuf, sendcount, sendtype,
recvbuf, recvcount, recvtype,
comm, comm->c_coll->coll_alltoall_module);

SPC_TIMER_STOP(OMPI_SPC_TIME_ALLTOALL, &timer);

OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME);
}

5 changes: 5 additions & 0 deletions ompi/mpi/c/alltoallv.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ int MPI_Alltoallv(const void *sendbuf, const int sendcounts[],
MPI_Datatype recvtype, MPI_Comm comm)
{
int i, size, err;
opal_timer_t timer = 0; /* SPC */

SPC_RECORD(OMPI_SPC_ALLTOALLV, 1);

Expand Down Expand Up @@ -125,10 +126,14 @@ int MPI_Alltoallv(const void *sendbuf, const int sendcounts[],

OPAL_CR_ENTER_LIBRARY();

SPC_TIMER_START(OMPI_SPC_TIME_ALLTOALLV, &timer);

/* Invoke the coll component to perform the back-end operation */
err = comm->c_coll->coll_alltoallv(sendbuf, sendcounts, sdispls, sendtype,
recvbuf, recvcounts, rdispls, recvtype,
comm, comm->c_coll->coll_alltoallv_module);
SPC_TIMER_STOP(OMPI_SPC_TIME_ALLTOALLV, &timer);

OMPI_ERRHANDLER_RETURN(err, comm, err, FUNC_NAME);
}

72 changes: 72 additions & 0 deletions ompi/runtime/ompi_spc.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ static ompi_spc_event_t ompi_spc_events_names[OMPI_SPC_NUM_COUNTERS] = {
SET_COUNTER_ARRAY(OMPI_SPC_TESTALL, "The number of times MPI_Testall was called."),
SET_COUNTER_ARRAY(OMPI_SPC_TESTANY, "The number of times MPI_Testany was called."),
SET_COUNTER_ARRAY(OMPI_SPC_TESTSOME, "The number of times MPI_Testsome was called."),
SET_COUNTER_ARRAY(OMPI_SPC_TIME_ALLTOALL, "The number microseconds spent performing the MPI_Alltoall operation. Note: The timer used on the back end is in cycles, which could potentially be problematic on a system where the clock frequency can change. On such a system, this counter could be inaccurate since we assume a fixed clock rate."),
SET_COUNTER_ARRAY(OMPI_SPC_TIME_ALLTOALLV, "The number microseconds spent performing the MPI_Alltoallv operation. Note: The timer used on the back end is in cycles, which could potentially be problematic on a system where the clock frequency can change. On such a system, this counter could be inaccurate since we assume a fixed clock rate."),
SET_COUNTER_ARRAY(OMPI_SPC_WAIT, "The number of times MPI_Wait was called."),
SET_COUNTER_ARRAY(OMPI_SPC_WAITALL, "The number of times MPI_Waitall was called."),
SET_COUNTER_ARRAY(OMPI_SPC_WAITANY, "The number of times MPI_Waitany was called."),
Expand Down Expand Up @@ -331,6 +333,8 @@ void ompi_spc_init(void)

/* If this is a timer event, set the corresponding timer_event entry */
SET_SPC_BIT(ompi_spc_timer_event, OMPI_SPC_MATCH_TIME);
SET_SPC_BIT(ompi_spc_timer_event, OMPI_SPC_TIME_ALLTOALL);
SET_SPC_BIT(ompi_spc_timer_event, OMPI_SPC_TIME_ALLTOALLV);

opal_argv_free(arg_strings);
}
Expand Down Expand Up @@ -403,6 +407,74 @@ static void ompi_spc_dump(void)
ompi_spc_comm->c_coll->coll_barrier(ompi_spc_comm, ompi_spc_comm->c_coll->coll_barrier_module);
}


/*
* Congestion - helper function for checking diff w/ SPCs
*
* Given a specific SPC name and prior value, we
* get the new value and return the difference between
* the prior and new values (diff = new - prev).
* If do not care about the diff you can pass NULL for spc_diff,
* and will simply get the new_value.
*
* Note: Return the value as-is (do not convert cycles, etc.)
*
* On success, return MPI_SUCCESS, otherwise return -1.
*/
int ompi_spc_value_diff(char *spc_name,
long long spc_prev_value,
long long *spc_new_value,
long long *spc_diff)
{
int i;
long long value = -1;
int found = 0;

if (NULL == ompi_spc_events) {
//fprintf(stderr, " #-- DBG: WARN: SPC system not available\n");
return -1;
}

/* Find the index of given SPC. */
for(i = 0; i < OMPI_SPC_NUM_COUNTERS; i++) {
if( 0 == strcmp(ompi_spc_events[i].name, spc_name) ) {

//OPAL_THREAD_LOCK(&_spc_mutex);

/*
* TJN: Not using SPC_CYCLES_TO_USECS() macro b/c it
* appears to have side-effects. :-/
*/
if( IS_SPC_BIT_SET(ompi_spc_timer_event, i) ) {
value = (long long)ompi_spc_events[i].value;
//fprintf(stderr, " #-- DBG: %s (tmp) value = %d sys_clock_freq_mhz = %d\n", spc_name, value, sys_clock_freq_mhz);
value = value / sys_clock_freq_mhz;
}

//fprintf(stderr, " #-- DBG: %s value = %d\n", spc_name, value);

//OPAL_THREAD_UNLOCK(&_spc_mutex);

found = 1;
break;
}
}

if (found != 1) {
printf("Error: Failed to find SPC counter '%s'\n", spc_name);
return -1;
}

*spc_new_value = value;

if (NULL != spc_diff) {
*spc_diff = value - spc_prev_value;
}

return MPI_SUCCESS;
}


/* Frees any dynamically alocated OMPI SPC data structures */
void ompi_spc_fini(void)
{
Expand Down
4 changes: 4 additions & 0 deletions ompi/runtime/ompi_spc.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ typedef enum ompi_spc_counters {
OMPI_SPC_TESTALL,
OMPI_SPC_TESTANY,
OMPI_SPC_TESTSOME,
OMPI_SPC_TIME_ALLTOALL,
OMPI_SPC_TIME_ALLTOALLV,
OMPI_SPC_WAIT,
OMPI_SPC_WAITALL,
OMPI_SPC_WAITANY,
Expand Down Expand Up @@ -187,6 +189,8 @@ void ompi_spc_user_or_mpi(int tag, ompi_spc_value_t value, unsigned int user_enu
void ompi_spc_cycles_to_usecs(ompi_spc_value_t *cycles);
void ompi_spc_update_watermark(unsigned int watermark_enum, unsigned int value_enum);

int ompi_spc_value_diff(char *spc_name, long long spc_prev_value, long long *spc_new_value, long long *spc_diff);

/* Macros for using the SPC utility functions throughout the codebase.
* If SPC_ENABLE is not 1, the macros become no-ops.
*/
Expand Down