Skip to content

Commit

Permalink
func(omp): make repeated reduce function more flexible (#125)
Browse files Browse the repository at this point in the history
* func(omp): set number of threads from input for repeated reduce

* func(omp): make repeated reduce function more flexible
  • Loading branch information
csegarragonz authored May 13, 2024
1 parent 23da49b commit 4d9f40c
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 16 deletions.
4 changes: 1 addition & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
build-wasm/
build-native/
build-native-shared/
build-*/
compile_commands.json
.clangd/

Expand Down
73 changes: 60 additions & 13 deletions func/omp/repeated_reduce.cpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
#include <faasm/input.h>
#include <faasm/shared_mem.h>

#include <cstdio>
#include <math.h>
#include <omp.h>
#include <unistd.h>
#include <vector>

bool doReduce()
// This reduce method is called with a varying number of threads, but with
// a maximum of 10. In addition, the inner parallel for pragma may be
// elastically scaled from nThreads, all the way up to 10.
bool doReduce(int numThreads)
{
int nThreads = 10;
int chunkSize = 1000;
int loopSize = nThreads * chunkSize;
int counts[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
int loopSize = numThreads * chunkSize;
int maxNumThreads = 10;
std::vector<int> counts(maxNumThreads, 0);

int reducedA = 0;
int reducedB = 0;
Expand All @@ -20,7 +25,7 @@ bool doReduce()
FAASM_REDUCE(reducedA, FAASM_TYPE_INT, FAASM_OP_SUM)
FAASM_REDUCE(reducedB, FAASM_TYPE_INT, FAASM_OP_SUM)

#pragma omp parallel for num_threads(nThreads) default(none) \
#pragma omp parallel for num_threads(numThreads) default(none) \
shared(counts, loopSize, success) reduction(+ : reducedA, reducedB)
for (int i = 0; i < loopSize; i++) {
int threadNum = omp_get_thread_num();
Expand Down Expand Up @@ -49,17 +54,53 @@ bool doReduce()
return 1;
}

// Check counts
for (int t = 0; t < nThreads; t++) {
if (counts[t] != chunkSize) {
printf(
"Loop count for thread %i: %i != %i\n", t, counts[t], chunkSize);
// First, work out how many threads actually executed the loop, by checking
// how many threads wrote to the counts array
int actualNumThreads = 0;
for (int i = 0; i < counts.size(); i++) {
if (counts.at(i) != 0) {
actualNumThreads++;
}
}

if ((actualNumThreads < numThreads) || (actualNumThreads > maxNumThreads)) {
printf("Actual number of threads outside valid range: %i \\not \\in "
"[%i, %i]\n",
actualNumThreads,
numThreads,
maxNumThreads);

// Exit fast in this case as posterior checks may seg-fault
return false;
}

// Check counts (only count the aggregate, and a uniform distribution, as
// we may elastically change the parallelism of the loop)
int actualChunkSize = (int)loopSize / actualNumThreads;
int total = 0;
for (int tNum = 0; tNum < actualNumThreads; tNum++) {
if (counts[tNum] != actualChunkSize) {
printf("Loop count for thread %i: %i != %i\n",
tNum,
counts[tNum],
actualChunkSize);
success = false;
}

total += counts[tNum];
}

int expectedFinalReducedA = 550000;
int expectedFinalReducedB = 825000;
if (total != loopSize) {
printf("Total loop count failed: %i != %i\n", total, loopSize);
success = false;
}

// The expected final value is: constant (10/15) * (sum [1, nThreads]) *
// chunkSize
int expectedFinalReducedA =
(int)10 * actualNumThreads * (actualNumThreads + 1) / 2 * actualChunkSize;
int expectedFinalReducedB =
(int)15 * actualNumThreads * (actualNumThreads + 1) / 2 * actualChunkSize;

if (reducedA != expectedFinalReducedA) {
printf("reducedA %i != %i\n", reducedA, expectedFinalReducedA);
Expand All @@ -76,10 +117,16 @@ bool doReduce()

int main(int argc, char* argv[])
{
int numThreads = faasm::getIntInput();
if (numThreads <= 0) {
printf("Incorrect number of threads passed as input: %i\n", numThreads);
return 1;
}

// Run reduce in a loop and check each iteration is correct
int nLoops = 10;
for (int i = 0; i < nLoops; i++) {
bool success = doReduce();
bool success = doReduce(numThreads);
if (!success) {
printf("Repeated reduce failed on loop %i\n", i);
return 1;
Expand Down

0 comments on commit 4d9f40c

Please sign in to comment.