Skip to content

Commit

Permalink
contrib: add SPC example (Alltoall looper)
Browse files Browse the repository at this point in the history
Signed-off-by: Thomas Naughton <[email protected]>
  • Loading branch information
naughtont3 committed Nov 30, 2022
1 parent b4bd007 commit ef257b6
Show file tree
Hide file tree
Showing 2 changed files with 238 additions and 0 deletions.
45 changes: 45 additions & 0 deletions contrib/spc-a2a-looper/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
Example SPC Alltoall Looper
---------------------------

Simple example that loops over `MPI_Alltoall()`. It tests
the `OMPI_SPC_TIME_ALLTOALL` counter. The test calculates
the diff per-rank at the App level and shows this info each
loop iteration (at all ranks).

The counter acculates and shows the full time for all Alltoall,
but the per-rank view shows the diff per loop.

Pre-reqs
--------
- Patch with `OMPI_SPC_TIME_ALLTOALL` counter
- OMPI build with `--enable-spc`

Usage
-----

```sh
mpirun -np $nprocs ./a2a_looper [N]

# (optional) arg1 - positive-integer for number of loops
```

Example
-------

Run for just 9 loops:

```sh
mpirun \
-np 4 \
--mca mpi_spc_attach OMPI_SPC_TIME_ALLTOALL \
--mca mpi_spc_dump_enabled true \
./a2a_looper 9
```

Notes
-----
- Less than 10 will print each loop, and
above that will print at each interval of 10 loops.

- Initial SPC code bits adapted from `ompi/examples/spc_example.c`

193 changes: 193 additions & 0 deletions contrib/spc-a2a-looper/a2a_looper.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
/*
* Tue Nov 29 2022 Thomas Naughton <[email protected]>
*
* Loops over MPI_Alltoall() 'MAX_NLOOP' times.
*
* Usage: mpirun -np $nprocs ./a2a_looper [N]
*
* Optional position-sensitive argument:
* arg1 - positive-integer for number of loops
*
* If no args are provided the program uses default values.
*
* Note: Initial SPC code bits adapted from 'ompi/examples/spc_example.c'
*
* TJN: Modified to have only one counter (OMPI_SPC_TIME_ALLTOALL),
* also we calculate the diff per-rank at the App level and show
* this info each run (at all ranks).
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <math.h>
#include <mpi.h>

int MAX_NLOOP = 100;

int main (int argc, char **argv)
{
int rank, size;
int *inbuf = NULL;
int *outbuf = NULL;
int i, j;
int nloop;

int rc;
int provided, num, name_len, desc_len, verbosity, bind, var_class, readonly, continuous, atomic, count, index;
char name[256], description[256];
MPI_Datatype datatype;
MPI_T_enum enumtype;
long long value;
int found = 0;
int num_elem = 1024;
long long _time_alltoall_past_value = 0;

if (argc > 1) {
MAX_NLOOP = atoi(argv[1]);
}

MPI_Init (&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);

/* Counter names to be read by ranks 0 and 1 */
/* (See also: ompi_spc_counters_t for list) */
char *counter_name = "runtime_spc_OMPI_SPC_TIME_ALLTOALL";
MPI_T_pvar_handle handle;
MPI_T_pvar_session session;

MPI_T_init_thread(MPI_THREAD_SINGLE, &provided);

/* Determine the MPI_T pvar indices for the OMPI_BYTES_SENT/RECIEVED_USER SPCs */
MPI_T_pvar_get_num(&num);

rc = MPI_T_pvar_session_create(&session);

for(i = 0; i < num; i++) {
name_len = desc_len = 256;
rc = PMPI_T_pvar_get_info(i, name, &name_len, &verbosity,
&var_class, &datatype, &enumtype, description, &desc_len, &bind,
&readonly, &continuous, &atomic);
if( MPI_SUCCESS != rc )
continue;

if(strcmp(name, counter_name) == 0) {
/* Create the MPI_T sessions/handles for the counters and start the counters */
rc = MPI_T_pvar_handle_alloc(session, i, NULL, &handle, &count);
rc = MPI_T_pvar_start(session, handle);
found = 1;
//printf("[%d] =====================================\n", rank);
//printf("[%d] %s -> %s\n", rank, name, description);
//printf("[%d] =====================================\n", rank);
//fflush(stdout);
}
}

/* Make sure we found the counters */
if(found == 0) {
fprintf(stderr, "ERROR: Couldn't find the appropriate SPC counter in the MPI_T pvars.\n");
MPI_Abort(MPI_COMM_WORLD, -1);
}

inbuf = (int *) malloc ( size * num_elem * sizeof(int) );
if (NULL == inbuf) {
fprintf(stderr, "Error: malloc failed (inbuf)\n");
goto cleanup;
}

outbuf = (int *) malloc ( size * num_elem * sizeof(int) );
if (NULL == outbuf) {
fprintf(stderr, "Error: malloc failed (outbuf)\n");
goto cleanup;
}

for (i=0; i < size * num_elem; i++) {
inbuf[i] = 100 + rank;
outbuf[i] = 0;
}

MPI_Barrier(MPI_COMM_WORLD);

MPI_Barrier(MPI_COMM_WORLD);

for (nloop=0; nloop < MAX_NLOOP; nloop++) {
long long tmp_max;
int global_rc;
long long new_value = 0;
long long diff = 0;

MPI_Barrier(MPI_COMM_WORLD);
fflush(NULL);

rc = MPI_Alltoall(inbuf, num_elem, MPI_INT, outbuf, num_elem, MPI_INT, MPI_COMM_WORLD);

/* Check if alltoall had any problems? */
MPI_Allreduce( &rc, &global_rc, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
if (rank == 0) {
if (global_rc != 0) {
fprintf(stderr, "Error: Alltoall failed! (rc=%d)\n", global_rc);
goto cleanup;
}
}

MPI_T_pvar_read(session, handle, &value);
MPI_Allreduce(&value, &tmp_max, 1, MPI_LONG_LONG, MPI_MAX, MPI_COMM_WORLD);

rc = ompi_spc_value_diff("OMPI_SPC_TIME_ALLTOALL",
_time_alltoall_past_value,
&new_value,
&diff);


MPI_Barrier(MPI_COMM_WORLD);

if ((MAX_NLOOP <= 20) || ( !(nloop % 10) )) {
//int usecs = 0;
int usecs = 250000; /* 0.25 sec */
//int usecs = 100000; /* 0.1 sec */
//int usecs = 2000000; /* 2 sec */

printf("%12s: Rank: %5d Size: %5d Loop: %8d %s: %lld max: %lld prev_value: %lld new_value: %lld diff: %lld -- SLEEP: %dus\n",
"a2a_looper", rank, size, nloop, counter_name, value, tmp_max, _time_alltoall_past_value, new_value, diff, usecs);
usleep(usecs);
}

_time_alltoall_past_value = new_value;

fflush(NULL);
MPI_Barrier(MPI_COMM_WORLD);
}

MPI_Barrier(MPI_COMM_WORLD);

#if 0
printf("[%d] ==========================\n", rank);
fflush(NULL);

rc = MPI_T_pvar_read(session, handle, &value);
printf("TJN: [%d] Value Read: %lld (%s)\n", rank, value, counter_name);
fflush(stdout);

MPI_Barrier(MPI_COMM_WORLD);
#endif

/* Stop the MPI_T session, free the handle, and then free the session */
rc = MPI_T_pvar_stop(session, handle);
rc = MPI_T_pvar_handle_free(session, &handle);

/* Stop the MPI_T session, free the handle, and then free the session */
rc = MPI_T_pvar_session_free(&session);

cleanup:
if (NULL != inbuf)
free(inbuf);

if (NULL != outbuf)
free(outbuf);

MPI_T_finalize();
MPI_Finalize();

return (0);
}

0 comments on commit ef257b6

Please sign in to comment.