From ef1e0642e8b58f079634c71165bfc62fa4d1a40e Mon Sep 17 00:00:00 2001
From: Nabib Ahmed <nahmed3536@meta.com>
Date: Tue, 4 Apr 2023 21:58:26 -0700
Subject: [PATCH 1/5] timeout threshold

---
 gpu_burn-drv.cpp | 80 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 72 insertions(+), 8 deletions(-)
diff --git a/gpu_burn-drv.cpp b/gpu_burn-drv.cpp
index 53e32fa..c24f6ec 100644
--- a/gpu_burn-drv.cpp
+++ b/gpu_burn-drv.cpp
@@ -28,7 +28,7 @@
  */
 
 // Matrices are SIZE*SIZE..  POT should be efficiently implemented in CUBLAS
-#define SIZE 8192ul 
+#define SIZE 8192ul
 #define USEMEM 0.9 // Try to allocate 90% of memory
 #define COMPARE_KERNEL "compare.ptx"
 
@@ -37,6 +37,8 @@
 //#define OPS_PER_MUL 17188257792ul // Measured for SIZE = 2048
 #define OPS_PER_MUL 1100048498688ul // Extrapolated for SIZE = 8192
 
+#include <algorithm>
+#include <chrono>
 #include <cstdio>
 #include <cstring>
 #include <errno.h>
@@ -53,6 +55,8 @@
 #include <unistd.h>
 #include <vector>
 
+#define SIGTERM_TIMEOUT_THRESHOLD_SECS 30 // number of seconds for sigterm to kill child processes before forcing a sigkill
+
 #include "cublas_v2.h"
 #define CUDA_ENABLE_DEPRECATED
 #include <cuda.h>
@@ -492,7 +496,7 @@ void updateTemps(int handle, std::vector<int> *temps) {
 }
 
 void listenClients(std::vector<int> clientFd, std::vector<pid_t> clientPid,
-                   int runTime) {
+                   int runTime, std::chrono::seconds sigterm_timeout_threshold_secs) {
     fd_set waitHandles;
 
     pid_t tempPid;
@@ -647,7 +651,7 @@ void listenClients(std::vector<int> clientFd, std::vector<pid_t> clientPid,
             break;
     }
 
-    printf("\nKilling processes.. ");
+    printf("\nKilling processes with SIGTERM (soft kill)\n");
     fflush(stdout);
     for (size_t i = 0; i < clientPid.size(); ++i)
         kill(clientPid.at(i), SIGTERM);
@@ -655,6 +659,54 @@ void listenClients(std::vector<int> clientFd, std::vector<pid_t> clientPid,
     kill(tempPid, SIGTERM);
     close(tempHandle);
 
+    // check each process, see if they are all alive until time threshold, then force kill if still alive
+    auto start = std::chrono::steady_clock::now();
+    auto stop = std::chrono::steady_clock::now();
+    auto duration = std::chrono::duration_cast<std::chrono::seconds>(stop - start);
+    std::vector<int> killed_processes; // track the number of killed processes
+    while (duration <= sigterm_timeout_threshold_secs) {
+        for (size_t i = 0; i < clientPid.size(); ++i) {
+            int status;
+            pid_t return_pid = waitpid(clientPid.at(i), &status, WNOHANG);
+            if (return_pid == clientPid.at(i)) {
+                /* child is finished. exit status in status */
+                killed_processes.push_back(return_pid);
+            }
+        }
+        int status;
+        pid_t return_pid = waitpid(tempPid, &status, WNOHANG);
+        if (return_pid == tempPid) {
+                /* child is finished. exit status in status */
+                killed_processes.push_back(return_pid);
+            }
+
+        // number of killed process should be number GPUs + 1 (need to add tempPid process) to exit while loop early
+        if (killed_processes.size() == clientPid.size() + 1) {
+            break;
+        }
+        sleep(1); // check if processes are alive every 1 second
+
+        // update the duration
+        stop = std::chrono::steady_clock::now();
+        duration = std::chrono::duration_cast<std::chrono::seconds>(stop - start);
+    }
+
+    // if duration exceeds time, do a sigkill
+    if (duration > sigterm_timeout_threshold_secs) {
+        printf("\nKilling processes with SIGKILL (force kill) ... \n");
+        fflush(stdout);
+
+        for (size_t i = 0; i < clientPid.size(); ++i) {
+            // check if pid was already killed with SIGTERM before using SIGKILL
+            if (std::find(killed_processes.begin(), killed_processes.end(), clientPid.at(i)) == killed_processes.end())
+                kill(clientPid.at(i), SIGKILL);
+        }
+
+        // check if pid was already killed with SIGTERM before using SIGKILL
+        if (std::find(killed_processes.begin(), killed_processes.end(), tempPid) == killed_processes.end())
+            kill(tempPid, SIGKILL);
+    }
+
     while (wait(NULL) != -1)
         ;
     printf("done\n");
@@ -666,7 +718,8 @@ void listenClients(std::vector<int> clientFd, std::vector<pid_t> clientPid,
 
 template <class T>
 void launch(int runLength, bool useDoubles, bool useTensorCores,
-            ssize_t useBytes, int device_id, const char * kernelFile) {
+            ssize_t useBytes, int device_id, const char * kernelFile,
+            std::chrono::seconds sigterm_timeout_threshold_secs) {
     system("nvidia-smi -L");
 
     // Initting A and B with random data
@@ -705,7 +758,7 @@ void launch(int runLength, bool useDoubles, bool useTensorCores,
             close(mainPipe[1]);
             int devCount;
             read(readMain, &devCount, sizeof(int));
-            listenClients(clientPipes, clientPids, runLength);
+            listenClients(clientPipes, clientPids, runLength, sigterm_timeout_threshold_secs);
         }
         for (size_t i = 0; i < clientPipes.size(); ++i)
             close(clientPipes.at(i));
@@ -756,7 +809,7 @@ void launch(int runLength, bool useDoubles, bool useTensorCores,
                     }
                 }
 
-                listenClients(clientPipes, clientPids, runLength);
+                listenClients(clientPipes, clientPids, runLength, sigterm_timeout_threshold_secs);
             }
         }
         for (size_t i = 0; i < clientPipes.size(); ++i)
@@ -779,6 +832,8 @@ void showHelp() {
     printf("-i N\tExecute only on GPU N\n");
     printf("-c FILE\tUse FILE as compare kernel.  Default is %s\n",
            COMPARE_KERNEL);
+    printf("-stts T\tSet timeout threshold to T seconds for using SIGTERM to abort child processes before using SIGKILL.  Default is %d\n",
+           SIGTERM_TIMEOUT_THRESHOLD_SECS);
     printf("-h\tShow this help message\n\n");
     printf("Examples:\n");
     printf("  gpu-burn -d 3600 # burns all GPUs with doubles for an hour\n");
@@ -809,6 +864,7 @@ int main(int argc, char **argv) {
     ssize_t useBytes = 0; // 0 == use USEMEM% of free mem
     int device_id = -1;
     char *kernelFile = (char *)COMPARE_KERNEL;
+    std::chrono::seconds sigterm_timeout_threshold_secs = std::chrono::seconds(SIGTERM_TIMEOUT_THRESHOLD_SECS);
 
     std::vector<std::string> args(argv, argv + argc);
     for (size_t i = 1; i < args.size(); ++i) {
@@ -885,6 +941,14 @@ int main(int argc, char **argv) {
                 thisParam++;
             }
         }
+        if (argc >= 2 && strncmp(argv[i], "-stts", 2) == 0) {
+            thisParam++;
+
+            if (argv[i + 1]) {
+                sigterm_timeout_threshold_secs = std::chrono::seconds(atoi(argv[i + 1]));
+                thisParam++;
+            }
+        }
     }
 
     if (argc - thisParam < 2)
@@ -896,10 +960,10 @@ int main(int argc, char **argv) {
 
     if (useDoubles)
         launch<double>(runLength, useDoubles, useTensorCores, useBytes,
-                       device_id, kernelFile);
+                       device_id, kernelFile, sigterm_timeout_threshold_secs);
     else
         launch<float>(runLength, useDoubles, useTensorCores, useBytes,
-                      device_id, kernelFile);
+                      device_id, kernelFile, sigterm_timeout_threshold_secs);
 
     return 0;
 }

From 67119053bb2ece1004c53ed837ceee2516790a65 Mon Sep 17 00:00:00 2001
From: Nabib Ahmed <nahmed3536@meta.com>
Date: Tue, 4 Apr 2023 22:08:14 -0700
Subject: [PATCH 2/5] timeout threshold - wait n seconds instead of polling
 every second

---
 gpu_burn-drv.cpp | 55 ++++++++++++++++++++----------------------------
 1 file changed, 23 insertions(+), 32 deletions(-)

diff --git a/gpu_burn-drv.cpp b/gpu_burn-drv.cpp
index c24f6ec..ce3766c 100644
--- a/gpu_burn-drv.cpp
+++ b/gpu_burn-drv.cpp
@@ -51,6 +51,7 @@
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <thread>
 #include <time.h>
 #include <unistd.h>
 #include <vector>
@@ -657,44 +658,32 @@ void listenClients(std::vector<int> clientFd, std::vector<pid_t> clientPid,
         kill(clientPid.at(i), SIGTERM);
 
     kill(tempPid, SIGTERM);
-    close(tempHandle);
 
-    // check each process, see if they are all alive until time threshold, then force kill if still alive
-    auto start = std::chrono::steady_clock::now();
-    auto stop = std::chrono::steady_clock::now();
-    auto duration = std::chrono::duration_cast<std::chrono::seconds>(stop - start);
+    // processes should be terminated by SIGTERM within threshold time (so wait and then check pids)
+    std::this_thread::sleep_for(sigterm_timeout_threshold_secs);
+
+    // check each process and see if they are alive
     std::vector<int> killed_processes; // track the number of killed processes
-    while (duration <= sigterm_timeout_threshold_secs) {
-        for (size_t i = 0; i < clientPid.size(); ++i) {
-            int status;
-            pid_t return_pid = waitpid(clientPid.at(i), &status, WNOHANG);
-            if (return_pid == clientPid.at(i)) {
-                /* child is finished. exit status in status */
-                killed_processes.push_back(return_pid);
-            }
-        }
+    // loop through pids for each client / GPU
+    for (size_t i = 0; i < clientPid.size(); ++i) {
         int status;
-        pid_t return_pid = waitpid(tempPid, &status, WNOHANG);
-        if (return_pid == tempPid) {
-                /* child is finished. exit status in status */
-                killed_processes.push_back(return_pid);
-            }
-
-        // number of killed process should be number GPUs + 1 (need to add tempPid process) to exit while loop early
-        if (killed_processes.size() == clientPid.size() + 1) {
-            break;
+        pid_t return_pid = waitpid(clientPid.at(i), &status, WNOHANG);
+        if (return_pid == clientPid.at(i)) {
+            /* child is finished. exit status in status */
+            killed_processes.push_back(return_pid);
         }
-        sleep(1); // check if processes are alive every 1 second
-
-        // update the duration
-        stop = std::chrono::steady_clock::now();
-        duration = std::chrono::duration_cast<std::chrono::seconds>(stop - start);
+    }
+    // handle the tempPid
+    int status;
+    pid_t return_pid = waitpid(tempPid, &status, WNOHANG);
+    if (return_pid == tempPid) {
+        /* child is finished. exit status in status */
+        killed_processes.push_back(return_pid);
     }
 
-    // if duration exceeds time, do a sigkill
-    if (duration > sigterm_timeout_threshold_secs) {
-        printf("\nKilling processes with SIGKILL (force kill) ... \n");
-        fflush(stdout);
+    // number of killed process should be number GPUs + 1 (need to add tempPid process) to exit while loop early
+    if (killed_processes.size() != clientPid.size() + 1) {
+        printf("\nKilling processes with SIGKILL (force kill)\n");
 
         for (size_t i = 0; i < clientPid.size(); ++i) {
             // check if pid was already killed with SIGTERM before using SIGKILL
@@ -707,6 +696,8 @@ void listenClients(std::vector<int> clientFd, std::vector<pid_t> clientPid,
             kill(tempPid, SIGKILL);
     }
 
+    close(tempHandle);
+
     while (wait(NULL) != -1)
         ;
     printf("done\n");

From 132677f27bf7b763c40c2f66193326a90a4d3952 Mon Sep 17 00:00:00 2001
From: Nabib Ahmed <nahmed3536@meta.com>
Date: Thu, 11 May 2023 16:23:53 -0700
Subject: [PATCH 3/5] added logger class

---
 gpu_burn-drv.cpp | 129 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)

diff --git a/gpu_burn-drv.cpp b/gpu_burn-drv.cpp
index ac02bbd..390fd37 100644
--- a/gpu_burn-drv.cpp
+++ b/gpu_burn-drv.cpp
@@ -41,12 +41,15 @@
 #include <chrono>
 #include <cstdio>
 #include <cstring>
+#include <ctime>
 #include <errno.h>
 #include <exception>
 #include <fstream>
 #include <map>
 #include <signal.h>
+#include <stdarg.h>
 #include <stdexcept>
+#include <stdio.h>
 #include <string.h>
 #include <string>
 #include <sys/time.h>
@@ -59,6 +62,129 @@
 
 #define SIGTERM_TIMEOUT_THRESHOLD_SECS 30 // number of seconds for sigterm to kill child processes before forcing a sigkill
 
+class Logger {
+    /* logger class with log levels and log message formatting */
+
+    // TIMESTAMP
+    char* timestamp_str() {
+        /* returns a timestamp string */
+
+        // gets current timestamp
+        time_t now = time(0);
+
+        // converts to string
+        char *time_str = ctime(&now);
+
+        // removes extraneous line break
+        if (time_str[strlen(time_str)-1] == '\n')
+            time_str[strlen(time_str)-1] = '\0';
+
+        return time_str;
+    }
+
+    // LOG MESSAGE TEMPLATE
+    void msg_prefix(int logLevel) {
+        /* beginning of the log message */
+        printf("[%s | %s] ", timestamp_str(), logLevels.at(logLevel));
+    }
+
+    void msg_suffix() {
+        /* end of the log message */
+        printf("\n");
+    }
+
+    protected:
+        // LOG LEVELS
+        const int DEBUG   = 0;
+        const int VERBOSE = 1;
+        const int INFO    = 2;
+        const int WARN    = 3;
+        const int ERROR   = 4;
+        const int NONE    = 5;
+        const std::vector<const char*> logLevels = {
+            "DEBUG",
+            "VERBOSE",
+            "INFO",
+            "WARN",
+            "ERROR",
+            "NONE",
+        };
+
+        // SET DEFAULT LOG LEVEL
+        int LEVEL = VERBOSE;
+
+    public:
+        void setLevel(int level) {
+            LEVEL = level;
+        }
+
+        int getLevel() {
+            return LEVEL;
+        }
+
+        const char* getLogLevels(int level) {
+            return logLevels.at(level);
+        }
+
+        // LOG MESSAGE FUNCTIONS
+        void debug(const char *fmt, ...) {
+            va_list va_args;
+            if (LEVEL <= DEBUG) {
+                msg_prefix(DEBUG);
+                va_start(va_args, fmt);
+                vprintf(fmt, va_args);
+                va_end(va_args);
+                msg_suffix();
+            }
+        }
+
+        void verbose(const char *fmt, ...) {
+            va_list va_args;
+            if (LEVEL <= VERBOSE) {
+                msg_prefix(VERBOSE);
+                va_start(va_args, fmt);
+                vprintf(fmt, va_args);
+                va_end(va_args);
+                msg_suffix();
+            }
+        }
+
+        void info(const char *fmt, ...) {
+            va_list va_args;
+            if (LEVEL <= INFO) {
+                msg_prefix(INFO);
+                va_start(va_args, fmt);
+                vprintf(fmt, va_args);
+                va_end(va_args);
+                msg_suffix();
+            }
+        }
+
+        void warn(const char *fmt, ...) {
+            va_list va_args;
+            if (LEVEL <= WARN) {
+                msg_prefix(WARN);
+                va_start(va_args, fmt);
+                vprintf(fmt, va_args);
+                va_end(va_args);
+                msg_suffix();
+            }
+        }
+
+        void error(const char *fmt, ...) {
+            va_list va_args;
+            if (LEVEL <= ERROR) {
+                msg_prefix(ERROR);
+                va_start(va_args, fmt);
+                vprintf(fmt, va_args);
+                va_end(va_args);
+                msg_suffix();
+            }
+        }
+};
+
+Logger logger; // initialize logger
+
 #include "cublas_v2.h"
 #define CUDA_ENABLE_DEPRECATED
 #include <cuda.h>
@@ -741,8 +867,11 @@ void showHelp() {
            COMPARE_KERNEL);
     printf("-stts T\tSet timeout threshold to T seconds for using SIGTERM to abort child processes before using SIGKILL.  Default is %d\n",
            SIGTERM_TIMEOUT_THRESHOLD_SECS);
+    printf("-L L\tSet the log level L; options are 0 (DEBUG), 1 (VERBOSE), 2 (INFO), 3 (WARN), 4 (ERROR), 5 (NONE).  Default is %s\n",
+            logger.getLogLevels(logger.getLevel()));
     printf("-h\tShow this help message\n\n");
     printf("Examples:\n");
+    printf("  gpu-burn -L 2 -tc 60 # burns all GPUs with tensor core for a minute and log INFO level and higher messages\n");
     printf("  gpu-burn -d 3600 # burns all GPUs with doubles for an hour\n");
     printf(
         "  gpu-burn -m 50%% # burns using 50% of the available GPU memory\n");

From 42b6554b0734fbc2364c3922225cf2563957b943 Mon Sep 17 00:00:00 2001
From: Nabib Ahmed <nahmed3536@meta.com>
Date: Thu, 11 May 2023 16:24:36 -0700
Subject: [PATCH 4/5] added command line

---
 gpu_burn-drv.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/gpu_burn-drv.cpp b/gpu_burn-drv.cpp
index 390fd37..ebf0a8c 100644
--- a/gpu_burn-drv.cpp
+++ b/gpu_burn-drv.cpp
@@ -985,6 +985,14 @@ int main(int argc, char **argv) {
                 thisParam++;
             }
         }
+        if (argc >= 2 && strncmp(argv[i], "-L", 2) == 0) {
+            thisParam++;
+
+            if (argv[i + 1]) {
+                logger.setLevel(atoi(argv[i + 1]));
+                thisParam++;
+            }
+        }
     }
 
     if (argc - thisParam < 2)

From 0ca09de2502272b500f9af4a3f21a998dc5951a1 Mon Sep 17 00:00:00 2001
From: Nabib Ahmed <nahmed3536@meta.com>
Date: Thu, 11 May 2023 16:51:31 -0700
Subject: [PATCH 5/5] log messages done

---
 gpu_burn-drv.cpp | 155 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 112 insertions(+), 43 deletions(-)

diff --git a/gpu_burn-drv.cpp b/gpu_burn-drv.cpp
index ebf0a8c..7d0a06e 100644
--- a/gpu_burn-drv.cpp
+++ b/gpu_burn-drv.cpp
@@ -45,6 +45,7 @@
 #include <errno.h>
 #include <exception>
 #include <fstream>
+#include <iomanip>
 #include <map>
 #include <signal.h>
 #include <stdarg.h>
@@ -52,6 +53,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <string>
+#include <sstream>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/wait.h>
@@ -257,10 +259,10 @@ template <class T> class GPU_Test {
         checkError(cuMemFree(d_Adata), "Free B");
         checkError(cuMemFree(d_Bdata), "Free C");
         cuMemFreeHost(d_faultyElemsHost);
-        printf("Freed memory for dev %d\n", d_devNumber);
+        logger.verbose("Freed memory for dev %d", d_devNumber);
 
         cublasDestroy(d_cublas);
-        printf("Uninitted cublas\n");
+        logger.verbose("Uninitted cublas");
     }
 
     static void termHandler(int signum) { g_running = false; }
@@ -300,7 +302,7 @@ template <class T> class GPU_Test {
         if (useBytes < 0)
             useBytes = (ssize_t)((double)availMemory() * (-useBytes / 100.0));
 
-        printf("Initialized device %d with %lu MB of memory (%lu MB available, "
+        logger.verbose("Initialized device %d with %lu MB of memory (%lu MB available, "
                "using %lu MB of it), %s%s\n",
                d_devNumber, totalMemory() / 1024ul / 1024ul,
                availMemory() / 1024ul / 1024ul, useBytes / 1024ul / 1024ul,
@@ -309,7 +311,7 @@ template <class T> class GPU_Test {
         size_t d_resultSize = sizeof(T) * SIZE * SIZE;
         d_iters = (useBytes - 2 * d_resultSize) /
                   d_resultSize; // We remove A and B sizes
-        printf("Results are %zu bytes each, thus performing %zu iterations\n",
+        logger.verbose("Results are %zu bytes each, thus performing %zu iterations",
                d_resultSize, d_iters);
         if ((size_t)useBytes < 3 * d_resultSize)
             throw std::string("Low mem for result. aborting.\n");
@@ -631,52 +633,54 @@ void listenClients(std::vector<int> clientFd, std::vector<pid_t> clientPid,
 
         // Printing progress (if a child has initted already)
         if (childReport) {
+            std::stringstream progress_stream;
             float elapsed =
                 fminf((float)(thisTime - startTime) / (float)runTime * 100.0f,
                       100.0f);
-            printf("\r%.1f%%  ", elapsed);
-            printf("proc'd: ");
+            progress_stream << "Process Update:\n\tProgress (%): " << std::fixed << std::setprecision(1) << elapsed;
+            progress_stream << "\n\tproc'd      : ";
             for (size_t i = 0; i < clientCalcs.size(); ++i) {
-                printf("%d (%.0f Gflop/s) ", clientCalcs.at(i),
-                       clientGflops.at(i));
+                progress_stream << std::to_string(clientCalcs.at(i));
                 if (i != clientCalcs.size() - 1)
-                    printf("- ");
+                    progress_stream << ", ";
             }
-            printf("  errors: ");
+            progress_stream << "\n\tGflop/s     : ";
+            for (size_t i = 0; i < clientCalcs.size(); ++i) {
+                progress_stream << std::fixed << std::setprecision(1) << clientGflops.at(i);
+                if (i != clientCalcs.size() - 1)
+                    progress_stream << ", ";
+            }
+            progress_stream << "\n\terrors      : ";
             for (size_t i = 0; i < clientErrors.size(); ++i) {
-                std::string note = "%d ";
-                if (clientCalcs.at(i) == -1)
-                    note += " (DIED!)";
-                else if (clientErrors.at(i))
-                    note += " (WARNING!)";
+                progress_stream << clientErrors.at(i);
+                if (clientCalcs.at(i) == -1) {
+                    progress_stream << " (DIED!)";
+                }
+                else if (clientErrors.at(i)) {
+                    progress_stream << " (WARNING!)";
+                }
 
-                printf(note.c_str(), clientErrors.at(i));
-                if (i != clientCalcs.size() - 1)
-                    printf("- ");
+                if (i != clientCalcs.size() - 1) {
+                    progress_stream << ", ";
+                }
             }
-            printf("  temps: ");
+            progress_stream << "\n\ttemps (C)   : ";
             for (size_t i = 0; i < clientTemp.size(); ++i) {
-                printf(clientTemp.at(i) != 0 ? "%d C " : "-- ",
-                       clientTemp.at(i));
-                if (i != clientCalcs.size() - 1)
-                    printf("- ");
+                progress_stream << clientTemp.at(i);
+                if (i != clientCalcs.size() - 1) {
+                    progress_stream << ", ";
+                }
             }
 
-            fflush(stdout);
-
             for (size_t i = 0; i < clientErrors.size(); ++i)
                 if (clientErrors.at(i))
                     clientFaulty.at(i) = true;
 
-            if (nextReport < elapsed) {
+            if (nextReport <= elapsed) {
                 nextReport = elapsed + 10.0f;
-                printf("\n\tSummary at:   ");
-                fflush(stdout);
-                system("date"); // Printing a date
-                fflush(stdout);
-                printf("\n");
                 for (size_t i = 0; i < clientErrors.size(); ++i)
                     clientErrors.at(i) = 0;
+                logger.verbose("%s", progress_stream.str().c_str());
             }
         }
 
@@ -694,7 +698,45 @@ void listenClients(std::vector<int> clientFd, std::vector<pid_t> clientPid,
             break;
     }
 
-    printf("\nKilling processes with SIGTERM (soft kill)\n");
+    // log out the final results
+    std::stringstream progress_stream;
+    progress_stream << "End of GPU Burn Results:\n\tProgress (%): 100";
+    progress_stream << "\n\tproc'd      : ";
+    for (size_t i = 0; i < clientCalcs.size(); ++i) {
+        progress_stream << std::to_string(clientCalcs.at(i));
+        if (i != clientCalcs.size() - 1)
+            progress_stream << ", ";
+    }
+    progress_stream << "\n\tGflop/s     : ";
+    for (size_t i = 0; i < clientCalcs.size(); ++i) {
+        progress_stream << std::fixed << std::setprecision(1) << clientGflops.at(i);
+        if (i != clientCalcs.size() - 1)
+            progress_stream << ", ";
+    }
+    progress_stream << "\n\terrors      : ";
+    for (size_t i = 0; i < clientErrors.size(); ++i) {
+        progress_stream << clientErrors.at(i);
+        if (clientCalcs.at(i) == -1) {
+            progress_stream << " (DIED!)";
+        }
+        else if (clientErrors.at(i)) {
+            progress_stream << " (DIED!)";
+        }
+        if (i != clientCalcs.size() - 1) {
+            progress_stream << ", ";
+        }
+    }
+    progress_stream << "\n\ttemps (C)   : ";
+    for (size_t i = 0; i < clientTemp.size(); ++i) {
+        progress_stream << clientTemp.at(i);
+        if (i != clientCalcs.size() - 1) {
+            progress_stream << ", ";
+        }
+    }
+
+    logger.verbose("%s", progress_stream.str().c_str());
+
+    logger.verbose("Killing processes with SIGTERM (soft kill)");
     fflush(stdout);
     for (size_t i = 0; i < clientPid.size(); ++i)
         kill(clientPid.at(i), SIGTERM);
@@ -725,7 +767,7 @@ void listenClients(std::vector<int> clientFd, std::vector<pid_t> clientPid,
 
     // number of killed process should be number GPUs + 1 (need to add tempPid process) to exit while loop early
     if (killed_processes.size() != clientPid.size() + 1) {
-        printf("\nKilling processes with SIGKILL (force kill)\n");
+        logger.verbose("Killing (remaining) processes with SIGKILL (force kill)");
 
         for (size_t i = 0; i < clientPid.size(); ++i) {
             // check if pid was already killed with SIGTERM before using SIGKILL
@@ -742,18 +784,42 @@ void listenClients(std::vector<int> clientFd, std::vector<pid_t> clientPid,
 
     while (wait(NULL) != -1)
         ;
-    printf("done\n");
+    logger.verbose("Killed all the jobs.");
 
-    printf("\nTested %d GPUs:\n", (int)clientPid.size());
+    logger.info("Tested %d GPUs:", (int)clientPid.size());
     for (size_t i = 0; i < clientPid.size(); ++i)
-        printf("\tGPU %d: %s\n", (int)i, clientFaulty.at(i) ? "FAULTY" : "OK");
+        logger.info("GPU %d: %s", (int)i, clientFaulty.at(i) ? "FAULTY" : "OK");
+}
+
+std::string exec(std::string command) {
+    // execute command and capture the stdout
+    int buffer_size = 2048;
+    char buffer[2048];
+    std::string result = "";
+
+    // Open pipe to file
+    FILE* pipe = popen(command.c_str(), "r");
+    if (!pipe) {
+        return "popen failed!";
+    }
+
+    // read till end of process:
+    while (!feof(pipe)) {
+
+        // use buffer to read and add to result
+        if (fgets(buffer, buffer_size, pipe) != NULL)
+            result += buffer;
+    }
+
+    pclose(pipe);
+    return result;
 }
 
 template <class T>
 void launch(int runLength, bool useDoubles, bool useTensorCores,
             ssize_t useBytes, int device_id, const char * kernelFile,
             std::chrono::seconds sigterm_timeout_threshold_secs) {
-    system("nvidia-smi -L");
+    logger.verbose("NVIDIA-SMI Output:\n%s", exec("nvidia-smi -L").c_str());
 
     // Initting A and B with random data
     T *A = (T *)malloc(sizeof(T) * SIZE * SIZE);
@@ -996,18 +1062,21 @@ int main(int argc, char **argv) {
     }
 
     if (argc - thisParam < 2)
-        printf("Run length not specified in the command line. ");
+        logger.warn("Run length not specified in the command line.");
     else
         runLength = atoi(argv[1 + thisParam]);
-    printf("Using compare file: %s\n", kernelFile);
-    printf("Burning for %d seconds.\n", runLength);
+    logger.verbose("Using compare file: %s", kernelFile);
+    logger.verbose("Burning for %d seconds.", runLength);
 
-    if (useDoubles)
+    if (useDoubles) {
+        logger.verbose("Launching with doubles");
         launch<double>(runLength, useDoubles, useTensorCores, useBytes,
                        device_id, kernelFile, sigterm_timeout_threshold_secs);
-    else
+    }
+    else {
+        logger.verbose("Launching with floats");
         launch<float>(runLength, useDoubles, useTensorCores, useBytes,
                       device_id, kernelFile, sigterm_timeout_threshold_secs);
-
+    }
     return 0;
 }