From b7a570ad5a03ea321f2c7cb1d2b454bffe292a41 Mon Sep 17 00:00:00 2001
From: Courtney Darville <courtneydarville94@outlook.com>
Date: Thu, 1 Dec 2022 13:45:37 +1100
Subject: [PATCH] Early Processing Methodology in More Benchmarks

Add early processing methodology to more benchmarks

Signed-off-by: Courtney Darville <courtneydarville94@outlook.com>
---
 apps/fault/src/main.c                   | 95 +++++++++++++++++++++++++
 apps/hardware/src/main.c                | 20 ++++++
 apps/irquser/src/main.c                 | 87 ++++++++++++++++++++++
 apps/scheduler/src/main.c               | 77 ++++++++++++++++++++
 apps/sel4bench/src/fault.c              | 16 +++++
 apps/sel4bench/src/hardware.c           |  5 ++
 apps/sel4bench/src/irq.c                | 21 ++++--
 apps/sel4bench/src/scheduler.c          | 10 +++
 apps/sel4bench/src/sync.c               | 14 ++++
 apps/signal/src/main.c                  | 20 +-----
 apps/sync/src/main.c                    | 83 +++++++++++++++++++++
 libsel4benchsupport/include/benchmark.h | 14 +++-
 libsel4benchsupport/include/fault.h     | 19 +++++
 libsel4benchsupport/include/hardware.h  |  8 +++
 libsel4benchsupport/include/irq.h       | 13 ++++
 libsel4benchsupport/include/scheduler.h | 12 ++++
 libsel4benchsupport/include/sync.h      | 11 +++
 libsel4benchsupport/src/support.c       | 13 ++++
 18 files changed, 513 insertions(+), 25 deletions(-)

diff --git a/apps/fault/src/main.c b/apps/fault/src/main.c
index 69a9bb71..2c6c94c8 100644
--- a/apps/fault/src/main.c
+++ b/apps/fault/src/main.c
@@ -115,6 +115,34 @@ static void measure_fault_handler_fn(int argc, char **argv)
     fault_handler_done(ep, ip, done_ep, reply);
 }
 
+static void measure_fault_handler_fn_ep(int argc, char **argv)
+{
+    seL4_CPtr ep, done_ep, reply;
+    volatile ccnt_t *start;
+    ccnt_t end, sum = 0, sum2 = 0, overhead;
+    fault_results_t *results;
+    DATACOLLECT_INIT();
+
+    parse_handler_args(argc, argv, &ep, &start, &results, &done_ep, &reply);
+
+    overhead = results->fault_ep_min_overhead;
+
+    seL4_Word ip = fault_handler_start(ep, done_ep, reply);
+    for (seL4_Word i = 0; i < N_RUNS; i++) {
+        ip += UD_INSTRUCTION_SIZE;
+        DO_REAL_REPLY_RECV_1(ep, ip, reply);
+
+        SEL4BENCH_READ_CCNT(end);
+        DATACOLLECT_GET_SUMS(i, N_IGNORED, *start, end, overhead, sum, sum2);
+    }
+
+    results->fault_ep_sum = sum;
+    results->fault_ep_sum2 = sum2;
+    results->fault_ep_num = N_RUNS - N_IGNORED;
+
+    fault_handler_done(ep, ip, done_ep, reply);
+}
+
 /* Pair for measuring fault handler -> faultee path */
 static void measure_fault_reply_fn(int argc, char **argv)
 {
@@ -153,6 +181,35 @@ static void measure_fault_reply_handler_fn(int argc, char **argv)
     fault_handler_done(ep, ip, done_ep, reply);
 }
 
+/* measure_fault_reply_fn with early processing */
+static void measure_fault_reply_fn_ep(int argc, char **argv)
+{
+    assert(argc == N_FAULTER_ARGS);
+    volatile ccnt_t *start = (volatile ccnt_t *) atol(argv[0]);
+    fault_results_t *results = (fault_results_t *) atol(argv[1]);
+    seL4_CPtr done_ep = atol(argv[2]);
+    ccnt_t overhead, sum = 0, sum2 = 0;
+    DATACOLLECT_INIT();
+
+    overhead = results->fault_reply_ep_min_overhead;
+
+    /* handle 1 fault first to make sure start is set */
+    fault();
+    for (seL4_Word i = 0; i < N_RUNS; i++) {
+        fault();
+        ccnt_t end;
+        SEL4BENCH_READ_CCNT(end);
+        DATACOLLECT_GET_SUMS(i, N_IGNORED, *start, end, overhead, sum, sum2);
+    }
+    fault();
+
+    results->fault_reply_ep_sum = sum;
+    results->fault_reply_ep_sum2 = sum2;
+    results->fault_reply_ep_num = N_RUNS - N_IGNORED;
+
+    seL4_Send(done_ep, seL4_MessageInfo_new(0, 0, 0, 0));
+}
+
 /* round_trip fault handling pair */
 static void measure_fault_roundtrip_fn(int argc, char **argv)
 {
@@ -170,6 +227,32 @@ static void measure_fault_roundtrip_fn(int argc, char **argv)
     seL4_Send(done_ep, seL4_MessageInfo_new(0, 0, 0, 0));
 }
 
+static void measure_fault_roundtrip_fn_ep(int argc, char **argv)
+{
+    assert(argc == N_FAULTER_ARGS);
+    fault_results_t *results = (fault_results_t *) atol(argv[1]);
+    seL4_CPtr done_ep = atol(argv[2]);
+    ccnt_t sum = 0, sum2 = 0, overhead;
+    DATACOLLECT_INIT();
+
+    overhead = results->round_trip_ep_min_overhead;
+
+    for (seL4_Word i = 0; i < N_RUNS; i++) {
+        ccnt_t start, end;
+        SEL4BENCH_READ_CCNT(start);
+        fault();
+        SEL4BENCH_READ_CCNT(end);
+        DATACOLLECT_GET_SUMS(i, N_IGNORED, start, end, overhead, sum, sum2);
+    }
+    fault();
+
+    results->round_trip_ep_sum = sum;
+    results->round_trip_ep_sum2 = sum2;
+    results->round_trip_ep_num = N_RUNS - N_IGNORED;
+
+    seL4_Send(done_ep, seL4_MessageInfo_new(0, 0, 0, 0));
+}
+
 static void measure_fault_roundtrip_handler_fn(int argc, char **argv)
 {
     seL4_CPtr ep, done_ep, reply;
@@ -250,11 +333,23 @@ static void run_fault_benchmark(env_t *env, fault_results_t *results)
     /* benchmark fault */
     run_benchmark(measure_fault_fn, measure_fault_handler_fn, done_ep.cptr);
 
+    /* benchmark fault early processing */
+    results->fault_ep_min_overhead = getMinOverhead(results->reply_recv_overhead, N_RUNS);
+    run_benchmark(measure_fault_fn, measure_fault_handler_fn_ep, done_ep.cptr);
+
     /* benchmark reply */
     run_benchmark(measure_fault_reply_fn, measure_fault_reply_handler_fn, done_ep.cptr);
 
+    /* benchmark reply early processing */
+    results->fault_reply_ep_min_overhead = getMinOverhead(results->ccnt_overhead, N_RUNS);
+    run_benchmark(measure_fault_reply_fn_ep, measure_fault_reply_handler_fn, done_ep.cptr);
+
     /* benchmark round_trip */
     run_benchmark(measure_fault_roundtrip_fn, measure_fault_roundtrip_handler_fn, done_ep.cptr);
+
+    /* benchmark round_trip early processing */
+    results->round_trip_ep_min_overhead = getMinOverhead(results->reply_recv_overhead, N_RUNS);
+    run_benchmark(measure_fault_roundtrip_fn_ep, measure_fault_roundtrip_handler_fn, done_ep.cptr);
 }
 
 void measure_overhead(fault_results_t *results)
diff --git a/apps/hardware/src/main.c b/apps/hardware/src/main.c
index e8c9a117..238480b2 100644
--- a/apps/hardware/src/main.c
+++ b/apps/hardware/src/main.c
@@ -49,6 +49,25 @@ void measure_nullsyscall(ccnt_t *results)
 
 }
 
+void measure_nullsyscall_ep(hardware_results_t *results)
+{
+    ccnt_t start, end, sum = 0, sum2 = 0, overhead;
+
+    overhead = results->overhead_min;
+    DATACOLLECT_INIT();
+
+    for (seL4_Word i = 0; i < N_RUNS; i++) {
+        SEL4BENCH_READ_CCNT(start);
+        DO_REAL_NULLSYSCALL();
+        SEL4BENCH_READ_CCNT(end);
+        DATACOLLECT_GET_SUMS(i, N_IGNORED, start, end, overhead, sum, sum2);
+    }
+
+    results->nullSyscall_ep_sum = sum;
+    results->nullSyscall_ep_sum2 = sum2;
+    results->nullSyscall_ep_num = N_RUNS - N_IGNORED;
+}
+
 static env_t *env;
 
 void CONSTRUCTOR(MUSLCSYS_WITH_VSYSCALL_PRIORITY) init_env(void)
@@ -75,6 +94,7 @@ int main(int argc, char **argv)
     /* measure overhead */
     measure_nullsyscall_overhead(results->nullSyscall_overhead);
     measure_nullsyscall(results->nullSyscall_results);
+    measure_nullsyscall_ep(results);
 
     /* done -> results are stored in shared memory so we can now return */
     benchmark_finished(EXIT_SUCCESS);
diff --git a/apps/irquser/src/main.c b/apps/irquser/src/main.c
index 5e4f6cb1..8419b0de 100644
--- a/apps/irquser/src/main.c
+++ b/apps/irquser/src/main.c
@@ -75,6 +75,41 @@ void ticker_fn(ccnt_t *results, volatile ccnt_t *current_time)
     seL4_Send(done_ep, seL4_MessageInfo_new(0, 0, 0, 0));
 }
 
+void ticker_fn_ep(int argc, char **argv)
+{
+    if (argc != 5) {
+        abort();
+    }
+    ccnt_t overhead = (ccnt_t) atol(argv[0]);
+    ccnt_t *results_sum = (ccnt_t *) atol(argv[1]);
+    ccnt_t *results_sum2 = (ccnt_t *) atol(argv[2]);
+    ccnt_t *results_num = (ccnt_t *) atol(argv[3]);
+    volatile ccnt_t *current_time = (volatile ccnt_t *) atol(argv[4]);
+
+    seL4_Word start, end_low;
+    ccnt_t end, sum = 0, sum2 = 0;
+    seL4_Word badge;
+
+    DATACOLLECT_INIT();
+
+    for (seL4_Word i = 0; i < N_RUNS; i++) {
+        /* wait for irq */
+        seL4_Wait(timer_signal, &badge);
+        /* record result */
+        SEL4BENCH_READ_CCNT(end);
+        sel4platsupport_irq_handle(irq_ops, timer_ntfn_id, badge);
+        end_low = (seL4_Word) end;
+        start = (seL4_Word) * current_time;
+        DATACOLLECT_GET_SUMS(i, N_IGNORED, start, end_low, overhead, sum, sum2);
+    }
+
+    *results_sum = sum;
+    *results_sum2 = sum2;
+    *results_num = N_RUNS - N_IGNORED;
+
+    seL4_Send(done_ep, seL4_MessageInfo_new(0, 0, 0, 0));
+}
+
 static env_t *env;
 
 void CONSTRUCTOR(MUSLCSYS_WITH_VSYSCALL_PRIORITY) init_env(void)
@@ -132,6 +167,9 @@ int main(int argc, char **argv)
         results->overheads[i] = end - start;
     }
 
+    /* find the minimum overhead for early processing run */
+    results->overhead_min = getMinOverhead(results->overheads, N_RUNS);
+
     /* create a frame for the shared time variable so we can share it between processes */
     ccnt_t *local_current_time = (ccnt_t *) vspace_new_pages(&env->vspace, seL4_AllRights, 1, seL4_PageBits);
     if (local_current_time == NULL) {
@@ -165,6 +203,31 @@ int main(int argc, char **argv)
     error = seL4_TCB_Suspend(ticker.tcb.cptr);
     assert(error == seL4_NoError);
 
+    /* run the benchmark again with early processing */
+    char ticker_ep_strings[5][WORD_STRING_SIZE];
+    char *ticker_ep_argv[5];
+    sel4utils_create_word_args(ticker_ep_strings, ticker_ep_argv, 5, (seL4_Word) results->overhead_min,
+                               &results->thread_results_ep_sum,
+                               &results->thread_results_ep_sum2, &results->thread_results_ep_num, (seL4_Word) local_current_time);
+    error = sel4utils_start_thread(&ticker, (sel4utils_thread_entry_fn) ticker_fn_ep, (void *) 5, (void *) ticker_ep_argv,
+                                   true);
+    if (error) {
+        ZF_LOGF("Failed to start ticker");
+    }
+
+    error = sel4utils_start_thread(&spinner, (sel4utils_thread_entry_fn) spinner_fn, (void *) 1, (void *) spinner_argv,
+                                   true);
+    assert(!error);
+
+    benchmark_wait_children(endpoint.cptr, "child of irq-user", 1);
+
+    /* stop spinner thread */
+    error = seL4_TCB_Suspend(spinner.tcb.cptr);
+    assert(error == seL4_NoError);
+
+    error = seL4_TCB_Suspend(ticker.tcb.cptr);
+    assert(error == seL4_NoError);
+
     /* now run the benchmark again, but run the spinner in another address space */
 
     /* restart ticker */
@@ -190,6 +253,30 @@ int main(int argc, char **argv)
 
     benchmark_wait_children(endpoint.cptr, "child of irq-user", 1);
 
+    /* stop threads */
+    error = seL4_TCB_Suspend(spinner_process.thread.tcb.cptr);
+    assert(error == seL4_NoError);
+
+    error = seL4_TCB_Suspend(ticker.tcb.cptr);
+    assert(error == seL4_NoError);
+
+    /* run the benchmark again but with early processing */
+    sel4utils_create_word_args(ticker_ep_strings, ticker_ep_argv, 5, (seL4_Word) results->overhead_min,
+                               &results->process_results_ep_sum,
+                               &results->process_results_ep_sum2, &results->process_results_ep_num, (seL4_Word) local_current_time);
+    error = sel4utils_start_thread(&ticker, (sel4utils_thread_entry_fn) ticker_fn_ep, (void *) 5, (void *) ticker_ep_argv,
+                                   true);
+    assert(!error);
+
+    /* start the spinner process */
+    sel4utils_create_word_args(strings, spinner_argv, 1, (seL4_Word) current_time_remote);
+    error = benchmark_spawn_process(&spinner_process, &env->slab_vka, &env->vspace, 1, spinner_argv, 1);
+    if (error) {
+        ZF_LOGF("Failed to start spinner process");
+    }
+
+    benchmark_wait_children(endpoint.cptr, "child of irq-user", 1);
+
     /* done -> results are stored in shared memory so we can now return */
     benchmark_finished(EXIT_SUCCESS);
     return 0;
diff --git a/apps/scheduler/src/main.c b/apps/scheduler/src/main.c
index 8ccdd9f3..97985e44 100644
--- a/apps/scheduler/src/main.c
+++ b/apps/scheduler/src/main.c
@@ -102,6 +102,25 @@ static void benchmark_yield(seL4_CPtr ep, ccnt_t *results, volatile ccnt_t *end)
     benchmark_wait_children(ep, "yielder", 1);
 }
 
+static void benchmark_yield_ep(seL4_CPtr ep, ccnt_t overhead, ccnt_t *result_sum, ccnt_t *result_sum2,
+                               ccnt_t *result_num, volatile ccnt_t *end)
+{
+    ccnt_t start, sum = 0, sum2 = 0;
+    DATACOLLECT_INIT();
+    /* run the benchmark */
+    for (seL4_Word i = 0; i < N_RUNS; i++) {
+        SEL4BENCH_READ_CCNT(start);
+        seL4_Yield();
+        DATACOLLECT_GET_SUMS(i, N_IGNORED, start, *end, overhead, sum, sum2);
+    }
+
+    *result_sum = sum;
+    *result_sum2 = sum2;
+    *result_num = N_RUNS - N_IGNORED;
+
+    benchmark_wait_children(ep, "yielder", 1);
+}
+
 static void benchmark_yield_thread(env_t *env, seL4_CPtr ep, ccnt_t *results)
 {
     sel4utils_thread_t thread;
@@ -117,6 +136,22 @@ static void benchmark_yield_thread(env_t *env, seL4_CPtr ep, ccnt_t *results)
     seL4_TCB_Suspend(thread.tcb.cptr);
 }
 
+static void benchmark_yield_thread_ep(env_t *env, seL4_CPtr ep, scheduler_results_t *results)
+{
+    sel4utils_thread_t thread;
+    volatile ccnt_t end;
+    char args_strings[N_YIELD_ARGS][WORD_STRING_SIZE];
+    char *argv[N_YIELD_ARGS];
+
+    benchmark_configure_thread(env, ep, seL4_MaxPrio, "yielder", &thread);
+    sel4utils_create_word_args(args_strings, argv, N_YIELD_ARGS, ep, (seL4_Word) &end);
+    sel4utils_start_thread(&thread, (sel4utils_thread_entry_fn) yield_fn, (void *) N_YIELD_ARGS, (void *) argv, 1);
+
+    benchmark_yield_ep(ep, results->overhead_ccnt_min, &results->thread_yield_ep_sum, &results->thread_yield_ep_sum2,
+                       &results->thread_yield_ep_num, &end);
+    seL4_TCB_Suspend(thread.tcb.cptr);
+}
+
 static void benchmark_yield_process(env_t *env, seL4_CPtr ep, ccnt_t *results)
 {
     sel4utils_process_t process;
@@ -153,6 +188,43 @@ static void benchmark_yield_process(env_t *env, seL4_CPtr ep, ccnt_t *results)
     seL4_TCB_Suspend(process.thread.tcb.cptr);
 }
 
+static void benchmark_yield_process_ep(env_t *env, seL4_CPtr ep, scheduler_results_t *results)
+{
+    sel4utils_process_t process;
+    void *start;
+    void *remote_start;
+    seL4_CPtr remote_ep;
+    char args_strings[N_YIELD_ARGS][WORD_STRING_SIZE];
+    char *argv[N_YIELD_ARGS];
+    UNUSED int error;
+    cspacepath_t path;
+
+    /* allocate a page to share for the start cycle count */
+    start = vspace_new_pages(&env->vspace, seL4_AllRights, 1, seL4_PageBits);
+    assert(start != NULL);
+
+    benchmark_shallow_clone_process(env, &process, seL4_MaxPrio, yield_fn, "yield process");
+
+    /* share memory for shared variable */
+    remote_start = vspace_share_mem(&env->vspace, &process.vspace, start, 1, seL4_PageBits,
+                                    seL4_AllRights, 1);
+    assert(remote_start != NULL);
+
+    /* copy ep cap */
+    vka_cspace_make_path(&env->slab_vka, ep, &path);
+    remote_ep = sel4utils_copy_path_to_process(&process, path);
+    assert(remote_ep != seL4_CapNull);
+
+    sel4utils_create_word_args(args_strings, argv, N_YIELD_ARGS, remote_ep, (seL4_Word) remote_start);
+
+    error = benchmark_spawn_process(&process, &env->slab_vka, &env->vspace, N_YIELD_ARGS, argv, 1);
+    assert(error == seL4_NoError);
+
+    benchmark_yield_ep(ep, results->overhead_ccnt_min, &results->process_yield_ep_sum, &results->process_yield_ep_sum2,
+                       &results->process_yield_ep_num, (volatile ccnt_t *) start);
+    seL4_TCB_Suspend(process.thread.tcb.cptr);
+}
+
 static void benchmark_prio_threads(env_t *env, seL4_CPtr ep, seL4_CPtr produce, seL4_CPtr consume,
                                    ccnt_t results[N_PRIOS][N_RUNS])
 {
@@ -367,6 +439,9 @@ int main(int argc, char **argv)
     measure_signal_overhead(produce.cptr, results->overhead_signal);
     measure_yield_overhead(results->overhead_ccnt);
 
+    /* extract the minimum overhead for early processing benchmarks */
+    results->overhead_ccnt_min = getMinOverhead(results->overhead_ccnt, N_RUNS);
+
     benchmark_prio_threads(env, done_ep.cptr, produce.cptr, consume.cptr,
                            results->thread_results);
     benchmark_prio_processes(env, done_ep.cptr, produce.cptr, consume.cptr,
@@ -375,7 +450,9 @@ int main(int argc, char **argv)
 
     /* thread yield benchmarks */
     benchmark_yield_thread(env, done_ep.cptr, results->thread_yield);
+    benchmark_yield_thread_ep(env, done_ep.cptr, results);
     benchmark_yield_process(env, done_ep.cptr, results->process_yield);
+    benchmark_yield_process_ep(env, done_ep.cptr, results);
     benchmark_yield_average(results->average_yield);
 
     /* done -> results are stored in shared memory so we can now return */
diff --git a/apps/sel4bench/src/fault.c b/apps/sel4bench/src/fault.c
index d9a1cb63..e99633d3 100644
--- a/apps/sel4bench/src/fault.c
+++ b/apps/sel4bench/src/fault.c
@@ -40,10 +40,20 @@ static json_t *fault_process(void *results)
     result = process_result(N_RUNS, raw_results->round_trip, desc);
     json_array_append_new(array, result_set_to_json(set));
 
+    set.name = "fault round trip (early processing)";
+    result = process_result_early_proc(raw_results->round_trip_ep_num, raw_results->round_trip_ep_sum,
+                                       raw_results->round_trip_ep_sum2, raw_results->round_trip_ep);
+    json_array_append_new(array, result_set_to_json(set));
+
     set.name = "faulter -> fault handler";
     result = process_result(N_RUNS, raw_results->fault, desc);
     json_array_append_new(array, result_set_to_json(set));
 
+    set.name = "faulter -> fault handler (early processing)";
+    result = process_result_early_proc(raw_results->fault_ep_num, raw_results->fault_ep_sum,
+                                       raw_results->fault_ep_sum2, raw_results->fault_ep);
+    json_array_append_new(array, result_set_to_json(set));
+
     /* calculate the overhead of reading the cycle count (fault handler -> faulter path
      * does not include a call to seL4_ReplyRecv_ */
 
@@ -60,6 +70,12 @@ static json_t *fault_process(void *results)
     result = process_result(N_RUNS, raw_results->fault_reply, desc);
     json_array_append_new(array, result_set_to_json(set));
 
+    /* fault to fault handler does not */
+    set.name = "fault handler -> faulter (early processing)";
+    result = process_result_early_proc(raw_results->fault_reply_ep_num, raw_results->fault_reply_ep_sum,
+                                       raw_results->fault_reply_ep_sum2, raw_results->fault_reply_ep);
+    json_array_append_new(array, result_set_to_json(set));
+
     return array;
 }
 
diff --git a/apps/sel4bench/src/hardware.c b/apps/sel4bench/src/hardware.c
index 0d82e591..1ae2aa58 100644
--- a/apps/sel4bench/src/hardware.c
+++ b/apps/sel4bench/src/hardware.c
@@ -37,6 +37,11 @@ static json_t *hardware_process(void *results)
     json_t *array = json_array();
     json_array_append_new(array, result_set_to_json(set));
 
+    set.name = "Hardware null_syscall thread (early processing)";
+    result = process_result_early_proc(raw_results->nullSyscall_ep_num, raw_results->nullSyscall_ep_sum,
+                                       raw_results->nullSyscall_ep_sum2, raw_results->nullSyscall_ep);
+    json_array_append_new(array, result_set_to_json(set));
+
     set.name = "Nop syscall overhead";
     set.results = &nopnulsyscall_result;
     json_array_append_new(array, result_set_to_json(set));
diff --git a/apps/sel4bench/src/irq.c b/apps/sel4bench/src/irq.c
index 19c850ac..1a622864 100644
--- a/apps/sel4bench/src/irq.c
+++ b/apps/sel4bench/src/irq.c
@@ -120,16 +120,27 @@ static json_t *irquser_process(void *r)
         .name = "IRQ user measurement overhead"
     };
 
-    result_t results[3];
+    result_t results[5];
 
     results[0] = process_result(N_RUNS, raw_results->overheads, desc);
 
     desc.overhead = results[0].min;
 
     results[1] = process_result(N_RUNS, raw_results->thread_results, desc);
-    results[2] = process_result(N_RUNS, raw_results->process_results, desc);
-
-    char *types[] = {"Measurement overhead", "Without context switch", "With context switch"};
+    results[2] = process_result_early_proc(raw_results->thread_results_ep_num,
+                                           raw_results->thread_results_ep_sum,
+                                           raw_results->thread_results_ep_sum2,
+                                           raw_results->thread_results_ep);
+    results[3] = process_result(N_RUNS, raw_results->process_results, desc);
+    results[4] = process_result_early_proc(raw_results->process_results_ep_num,
+                                           raw_results->process_results_ep_sum,
+                                           raw_results->process_results_ep_sum2,
+                                           raw_results->process_results_ep);
+
+    char *types[] = {"Measurement overhead", "Without context switch",
+                     "Without context switch (early processing)", "With context switch",
+                     "With context switch (early processing)"
+                    };
 
     column_t col = {
         .header = "Type",
@@ -139,7 +150,7 @@ static json_t *irquser_process(void *r)
 
     result_set_t set = {
         .name = "IRQ path cycle count (measured from user level)",
-        .n_results = 3,
+        .n_results = 5,
         .results = results,
         .n_extra_cols = 1,
         .extra_cols = &col
diff --git a/apps/sel4bench/src/scheduler.c b/apps/sel4bench/src/scheduler.c
index 305efa48..41718797 100644
--- a/apps/sel4bench/src/scheduler.c
+++ b/apps/sel4bench/src/scheduler.c
@@ -28,10 +28,20 @@ static void process_yield_results(scheduler_results_t *results, ccnt_t overhead,
     result = process_result(N_RUNS, results->thread_yield, desc);
     json_array_append_new(array, result_set_to_json(set));
 
+    set.name = "Thread yield (early processing)";
+    result = process_result_early_proc(results->thread_yield_ep_num, results->thread_yield_ep_sum,
+                                       results->thread_yield_ep_sum2, results->thread_yield_ep);
+    json_array_append_new(array, result_set_to_json(set));
+
     set.name = "Process yield";
     result = process_result(N_RUNS, results->process_yield, desc);
     json_array_append_new(array, result_set_to_json(set));
 
+    set.name = "Process yield (early processing)";
+    result = process_result_early_proc(results->process_yield_ep_num, results->process_yield_ep_sum,
+                                       results->process_yield_ep_sum2, results->process_yield_ep);
+    json_array_append_new(array, result_set_to_json(set));
+
     result_t average_results[NUM_AVERAGE_EVENTS];
     process_average_results(N_RUNS, NUM_AVERAGE_EVENTS, results->average_yield, average_results);
     json_array_append_new(array, average_counters_to_json("Average seL4_Yield (no thread switch)",
diff --git a/apps/sel4bench/src/sync.c b/apps/sel4bench/src/sync.c
index a76c6726..71775db5 100644
--- a/apps/sel4bench/src/sync.c
+++ b/apps/sel4bench/src/sync.c
@@ -74,6 +74,20 @@ static json_t *sync_process(void *results)
         json_array_append_new(array, result_set_to_json(set));
     }
 
+    result = process_result_early_proc(raw_results->producer_to_consumer_ep_num,
+                                       raw_results->producer_to_consumer_ep_sum,
+                                       raw_results->producer_to_consumer_ep_sum2,
+                                       raw_results->producer_to_consumer_ep);
+    set.name = "Producer to consumer (early processing)";
+    json_array_append_new(array, result_set_to_json(set));
+
+    result = process_result_early_proc(raw_results->consumer_to_producer_ep_num,
+                                       raw_results->consumer_to_producer_ep_sum,
+                                       raw_results->consumer_to_producer_ep_sum2,
+                                       raw_results->consumer_to_producer_ep);
+    set.name = "Consumer to producer (early processing)";
+    json_array_append_new(array, result_set_to_json(set));
+
     return array;
 }
 
diff --git a/apps/signal/src/main.c b/apps/signal/src/main.c
index 6c92023b..c2cc72bb 100644
--- a/apps/signal/src/main.c
+++ b/apps/signal/src/main.c
@@ -278,24 +278,6 @@ void measure_signal_overhead(seL4_CPtr ntfn, ccnt_t *results)
     }
 }
 
-/*
- * Execution flow for Early Processing: we have to calculate Min value
- * of measured overhead before running Signal benchmark.
- *
- * In "Late Processing" flow all the data are processed
- * after all the benchmarks has finished.
- */
-ccnt_t getMinOverhead(ccnt_t overhead[N_RUNS])
-{
-    ccnt_t min = -1;
-
-    for (int i = 0; i < N_RUNS; i++) {
-        min = (overhead[i] < min) ? overhead[i] : min;
-    }
-
-    return min;
-}
-
 static env_t *env;
 
 void CONSTRUCTOR(MUSLCSYS_WITH_VSYSCALL_PRIORITY) init_env(void)
@@ -346,7 +328,7 @@ int main(int argc, char **argv)
      * result_desc_t and CONFIG_ALLOW_UNSTABLE_OVERHEAD to deal with overhead.
      * NB! CONFIG_ALLOW_UNSTABLE_OVERHEAD is not avail. in signal app.
     */
-    results->overhead_min = getMinOverhead(results->overhead);
+    results->overhead_min = getMinOverhead(results->overhead, N_RUNS);
 
     benchmark(env, done_ep.cptr, ntfn.cptr, results);
 
diff --git a/apps/sync/src/main.c b/apps/sync/src/main.c
index cd2ac50e..fa177e83 100644
--- a/apps/sync/src/main.c
+++ b/apps/sync/src/main.c
@@ -222,6 +222,86 @@ void benchmark_producer_consumer(env_t *env, seL4_CPtr ep, seL4_CPtr block_ep, s
     seL4_TCB_Suspend(consumer.tcb.cptr);
 }
 
+#define SYNC_PRODUCER_CONSUMER_FUNC_EP(name, condition, wait_func, work, direction) \
+    void \
+    name (int argc, char **argv) \
+    { \
+        seL4_CPtr done_ep = (seL4_CPtr) atol(argv[0]); \
+        seL4_CPtr block_ep = (seL4_CPtr) atol(argv[1]); \
+        sync_bin_sem_t *lock = (sync_bin_sem_t *) atol(argv[2]); \
+        sync_cv_t *wait_cv = (sync_cv_t *) atol(argv[3]); \
+        sync_cv_t *signal_cv = (sync_cv_t *) atol(argv[4]); \
+        int *fifo_head = (int *) atol(argv[5]); \
+        ccnt_t *start = (ccnt_t *) atol(argv[6]); \
+        ccnt_t *signal = (ccnt_t *) atol(argv[7]); \
+        sync_results_t *results = (sync_results_t *) atol(argv[8]); \
+        ccnt_t end, sum = 0, sum2 = 0; \
+        DATACOLLECT_INIT(); \
+        for (seL4_Word i = 0; i < N_RUNS; i++) { \
+            sync_bin_sem_wait(lock); \
+            while (condition) { \
+                wait_func(lock, wait_cv); \
+            } \
+            SEL4BENCH_READ_CCNT(end); \
+            DATACOLLECT_GET_SUMS(i, N_IGNORED, *start, end, 0, sum, sum2); \
+            work; \
+            SEL4BENCH_READ_CCNT(*signal); \
+            sync_cv_signal(signal_cv); \
+            sync_bin_sem_post(lock); \
+        } \
+        \
+        results->direction##_ep_sum = sum; \
+        results->direction##_ep_sum2 = sum2; \
+        results->direction##_ep_num = N_RUNS - N_IGNORED; \
+        \
+        seL4_Send(done_ep, seL4_MessageInfo_new(0, 0, 0, 0)); \
+        seL4_Wait(block_ep, NULL); \
+    }
+
+SYNC_PRODUCER_CONSUMER_FUNC_EP(consumer_func_ep, (*fifo_head == 0), sync_cv_wait, (*fifo_head)--, consumer_to_producer)
+
+SYNC_PRODUCER_CONSUMER_FUNC_EP(producer_func_ep, (*fifo_head == FIFO_SIZE), sync_cv_wait, (*fifo_head)++,
+                               producer_to_consumer)
+
+void benchmark_producer_consumer_ep(env_t *env, seL4_CPtr ep, seL4_CPtr block_ep, sync_bin_sem_t *lock,
+                                    sync_cv_t *producer_cv, sync_cv_t *consumer_cv, sync_results_t *results)
+{
+    sel4utils_thread_t producer, consumer;
+    char producer_args_strings[N_PRODUCER_CONSUMER_ARGS][WORD_STRING_SIZE];
+    char *producer_argv[N_PRODUCER_CONSUMER_ARGS];
+    char consumer_args_strings[N_PRODUCER_CONSUMER_ARGS][WORD_STRING_SIZE];
+    char *consumer_argv[N_PRODUCER_CONSUMER_ARGS];
+    int UNUSED error;
+
+    /* Create producer consumer threads */
+    benchmark_configure_thread(env, 0, seL4_MaxPrio, "producer", &producer);
+    benchmark_configure_thread(env, 0, seL4_MaxPrio, "consumer", &consumer);
+
+    int fifo_head = 0;
+    ccnt_t producer_signal, consumer_signal;
+
+    sel4utils_create_word_args(producer_args_strings, producer_argv, N_PRODUCER_CONSUMER_ARGS,
+                               ep, block_ep, lock, consumer_cv, producer_cv, &fifo_head,
+                               &producer_signal, &consumer_signal, results);
+
+    sel4utils_create_word_args(consumer_args_strings, consumer_argv, N_PRODUCER_CONSUMER_ARGS,
+                               ep, block_ep, lock, producer_cv, consumer_cv, &fifo_head,
+                               &consumer_signal, &producer_signal, results);
+
+    error = sel4utils_start_thread(&producer, (sel4utils_thread_entry_fn) producer_func_ep,
+                                   (void *) N_PRODUCER_CONSUMER_ARGS, (void *) producer_argv, 1);
+    assert(error == seL4_NoError);
+
+    error = sel4utils_start_thread(&consumer, (sel4utils_thread_entry_fn) consumer_func_ep,
+                                   (void *) N_PRODUCER_CONSUMER_ARGS, (void *) consumer_argv, 1);
+    assert(error == seL4_NoError);
+
+    benchmark_wait_children(ep, "Broadcast bench waiters", 2);
+
+    seL4_TCB_Suspend(producer.tcb.cptr);
+    seL4_TCB_Suspend(consumer.tcb.cptr);
+}
+
 static env_t *env;
 
 void CONSTRUCTOR(MUSLCSYS_WITH_VSYSCALL_PRIORITY) init_env(void)
@@ -287,6 +367,9 @@ int main(int argc, char **argv)
     benchmark_producer_consumer(env, done_ep.cptr, block_ep.cptr, &lock,
                                 &producer_cv, &consumer_cv, results);
 
+    benchmark_producer_consumer_ep(env, done_ep.cptr, block_ep.cptr, &lock,
+                                   &producer_cv, &consumer_cv, results);
+
     sync_cv_destroy(&env->slab_vka, &producer_cv);
     sync_cv_destroy(&env->slab_vka, &consumer_cv);
     sync_bin_sem_destroy(&env->slab_vka, &lock);
diff --git a/libsel4benchsupport/include/benchmark.h b/libsel4benchsupport/include/benchmark.h
index 7441b14e..17667cd4 100644
--- a/libsel4benchsupport/include/benchmark.h
+++ b/libsel4benchsupport/include/benchmark.h
@@ -47,6 +47,18 @@ sample = is_counted * (e - s - o);\
 par_sum += sample; par_sum2 += sample * sample;\
 }
 
+/*
+ * Execution flow for Early Processing: we have to calculate min value
+ * of measured overheads before running benchmark.
+ *
+ * In "Late Processing" flow all the data is processed
+ * after all the benchmarks have finished.
+ *
+ * @param overhead array of overhead measurements
+ * @param overhead_size number of overhead measurements taken
+ */
+ccnt_t getMinOverhead(ccnt_t *overhead, seL4_Word overhead_size);
+
 /* benchmarking environment set up by root task */
 typedef struct env {
     /* vka interface for allocating *fast* objects in the benchmark */
@@ -178,4 +190,4 @@ void send_result(seL4_CPtr ep, ccnt_t result);
  *
  * @param ep The endpoint the result will be received from
  */
-ccnt_t get_result(seL4_CPtr ep);
+ccnt_t get_result(seL4_CPtr ep);
\ No newline at end of file
diff --git a/libsel4benchsupport/include/fault.h b/libsel4benchsupport/include/fault.h
index 565ef5c3..b659e637 100644
--- a/libsel4benchsupport/include/fault.h
+++ b/libsel4benchsupport/include/fault.h
@@ -21,4 +21,23 @@ typedef struct {
     ccnt_t round_trip[N_RUNS + 1];
     ccnt_t fault[N_RUNS + 1];
     ccnt_t fault_reply[N_RUNS + 1];
+
+    /* Data for early processing */
+    ccnt_t round_trip_ep_sum;
+    ccnt_t round_trip_ep_sum2;
+    ccnt_t round_trip_ep_num;
+    ccnt_t round_trip_ep_min_overhead;
+    ccnt_t round_trip_ep[N_RUNS];
+
+    ccnt_t fault_ep_sum;
+    ccnt_t fault_ep_sum2;
+    ccnt_t fault_ep_num;
+    ccnt_t fault_ep_min_overhead;
+    ccnt_t fault_ep[N_RUNS];
+
+    ccnt_t fault_reply_ep_sum;
+    ccnt_t fault_reply_ep_sum2;
+    ccnt_t fault_reply_ep_num;
+    ccnt_t fault_reply_ep_min_overhead;
+    ccnt_t fault_reply_ep[N_RUNS];
 } fault_results_t;
diff --git a/libsel4benchsupport/include/hardware.h b/libsel4benchsupport/include/hardware.h
index 7d12ad8f..c573144c 100644
--- a/libsel4benchsupport/include/hardware.h
+++ b/libsel4benchsupport/include/hardware.h
@@ -13,4 +13,12 @@
 typedef struct hardware_results {
     ccnt_t nullSyscall_results[N_RUNS];
     ccnt_t nullSyscall_overhead[N_RUNS];
+
+    /* Data for early processing */
+    ccnt_t overhead_min;
+
+    ccnt_t nullSyscall_ep_sum;
+    ccnt_t nullSyscall_ep_sum2;
+    ccnt_t nullSyscall_ep_num;
+    ccnt_t nullSyscall_ep[N_RUNS];
 } hardware_results_t;
diff --git a/libsel4benchsupport/include/irq.h b/libsel4benchsupport/include/irq.h
index 850213ae..d82d5030 100644
--- a/libsel4benchsupport/include/irq.h
+++ b/libsel4benchsupport/include/irq.h
@@ -23,4 +23,17 @@ typedef struct irquser_results_t {
     ccnt_t overheads[N_RUNS];
     ccnt_t thread_results[N_RUNS];
     ccnt_t process_results[N_RUNS];
+
+    /* Data for early processing */
+    ccnt_t overhead_min;
+
+    ccnt_t thread_results_ep_sum;
+    ccnt_t thread_results_ep_sum2;
+    ccnt_t thread_results_ep_num;
+    ccnt_t thread_results_ep[N_RUNS];
+
+    ccnt_t process_results_ep_sum;
+    ccnt_t process_results_ep_sum2;
+    ccnt_t process_results_ep_num;
+    ccnt_t process_results_ep[N_RUNS];
 } irquser_results_t;
diff --git a/libsel4benchsupport/include/scheduler.h b/libsel4benchsupport/include/scheduler.h
index 4d88a1a0..ba899456 100644
--- a/libsel4benchsupport/include/scheduler.h
+++ b/libsel4benchsupport/include/scheduler.h
@@ -23,6 +23,18 @@ typedef struct scheduler_results_t {
     ccnt_t overhead_ccnt[N_RUNS];
     ccnt_t average_yield[N_RUNS][NUM_AVERAGE_EVENTS];
 
+    /* Data for early processing */
+    ccnt_t overhead_ccnt_min;
+
+    ccnt_t thread_yield_ep_sum;
+    ccnt_t thread_yield_ep_sum2;
+    ccnt_t thread_yield_ep_num;
+    ccnt_t thread_yield_ep[N_RUNS];
+
+    ccnt_t process_yield_ep_sum;
+    ccnt_t process_yield_ep_sum2;
+    ccnt_t process_yield_ep_num;
+    ccnt_t process_yield_ep[N_RUNS];
 } scheduler_results_t;
 
 static inline uint8_t gen_next_prio(int i)
diff --git a/libsel4benchsupport/include/sync.h b/libsel4benchsupport/include/sync.h
index c0edd9cd..b95a370e 100644
--- a/libsel4benchsupport/include/sync.h
+++ b/libsel4benchsupport/include/sync.h
@@ -44,6 +44,17 @@ typedef struct sync_results {
 
     ccnt_t producer_to_consumer[N_PROD_CONS_BENCHMARKS][N_RUNS];
     ccnt_t consumer_to_producer[N_PROD_CONS_BENCHMARKS][N_RUNS];
+
+    /* Data for early processing */
+    ccnt_t producer_to_consumer_ep_sum;
+    ccnt_t producer_to_consumer_ep_sum2;
+    ccnt_t producer_to_consumer_ep_num;
+    ccnt_t producer_to_consumer_ep[N_RUNS];
+
+    ccnt_t consumer_to_producer_ep_sum;
+    ccnt_t consumer_to_producer_ep_sum2;
+    ccnt_t consumer_to_producer_ep_num;
+    ccnt_t consumer_to_producer_ep[N_RUNS];
 } sync_results_t;
 
 typedef void (*helper_func_t)(int argc, char *argv[]);
diff --git a/libsel4benchsupport/src/support.c b/libsel4benchsupport/src/support.c
index 314c9fe4..b34cbe4b 100644
--- a/libsel4benchsupport/src/support.c
+++ b/libsel4benchsupport/src/support.c
@@ -9,6 +9,7 @@
 
 #include <autoconf.h>
 
+#include <sel4bench/sel4bench.h>
 #include <sel4platsupport/timer.h>
 #include <sel4platsupport/io.h>
 #include <sel4rpc/client.h>
@@ -50,6 +51,18 @@ static char ALIGN(0x1000) app_morecore_area[MORE_CORE_SIZE];
 static char allocator_mem_pool[ALLOCATOR_STATIC_POOL_SIZE];
 #define ALLOCMAN_VIRTUAL_SIZE BIT(20)
 
+/* early processing helper function */
+ccnt_t getMinOverhead(ccnt_t *overhead, seL4_Word overhead_size)
+{
+    ccnt_t min = -1;
+
+    for (int i = 0; i < overhead_size; i++) {
+        min = (overhead[i] < min) ? overhead[i] : min;
+    }
+
+    return min;
+}
+
 /* serial server */
 static serial_client_context_t context;