From fb2ac114265988fc937e88c6162d65df5b10ffc6 Mon Sep 17 00:00:00 2001 From: Diogo Netto Date: Thu, 6 Oct 2022 16:41:39 -0400 Subject: [PATCH 01/10] started perf stuff --- measure_gc_cycles.c | 41 +++++++++++++++++++++++++++++++++++++++++ utils.jl | 21 ++++++++++++++++++++- 2 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 measure_gc_cycles.c diff --git a/measure_gc_cycles.c b/measure_gc_cycles.c new file mode 100644 index 0000000..eb8b736 --- /dev/null +++ b/measure_gc_cycles.c @@ -0,0 +1,41 @@ +// Based on `https://man7.org/linux/man-pages/man2/perf_event_open.2.html` + +#include +#include +#include +#include +#include +#include +#include + +long perf_event_start(void) +{ + struct perf_event_attr pe; + memset(&pe, 0, sizeof(pe)); + pe.type = PERF_TYPE_HARDWARE; + pe.size = sizeof(pe); + pe.config = PERF_COUNT_HW_INSTRUCTIONS; + pe.disabled = 1; + pe.exclude_kernel = 1; + pe.exclude_hv = 1; + + int fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0); + if (fd == -1) { + fprintf(stderr, "Error opening leader\n", pe.config); + exit(1); + } + + ioctl(fd, PERF_EVENT_IOC_RESET, 0); + ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); + + return fd; +} + +long perf_event_end(int fd) +{ + long long count; + ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); + read(fd, &count, sizeof(count)); + close(fd); + return count; +} diff --git a/utils.jl b/utils.jl index 93a32cb..9c68dca 100644 --- a/utils.jl +++ b/utils.jl @@ -2,11 +2,28 @@ using Pkg Pkg.instantiate() # It is dumb that I have to do this using Serialization +const perf_fd = Ref(UInt64(0)) +const cycles_in_gc = Ref(UInt64(0)) + +function gc_cb_pre() + perf_fd[] = ccall((:perf_event_start, "lib_gc_benchmarks.so"), Cvoid, ()) + nothing +end + +function gc_cb_post() + cycles_in_gc[] += ccall((:perf_event_start, "lib_gc_benchmarks.so"), Clonglong, (Cint,), (perf_fd[],)) + nothing +end + macro gctime(ex) fc = isdefined(Base.Experimental, Symbol("@force_compile")) ? :(Base.Experimental.@force_compile) : :() quote + ccall(:jl_gc_set_cb_pre_gc, Cvoid, (Ptr{Cvoid}, Cint), + @cfunction(gc_cb_pre, Cvoid, (Cint,)), true) + ccall(:jl_gc_set_cb_post_gc, Cvoid, (Ptr{Cvoid}, Cint), + @cfunction(gc_cb_post, Cvoid, (Cint,)), true) $fc local result try @@ -15,11 +32,13 @@ macro gctime(ex) local val = $(esc(ex)) local end_time = time_ns() local end_gc_num = Base.gc_num() + local cycles_in_gc = cycles_in_gc[] result = ( value = val, times = (end_time - start_time), gc_diff = Base.GC_Diff(end_gc_num, start_gc_num), - gc_end = end_gc_num + gc_end = end_gc_num, + cycles_in_gc = cycles_in_gc, ) catch e @show e From 2b23e9c91b865c631bb2dc4378e1759a56d53a30 Mon Sep 17 00:00:00 2001 From: Diogo C Netto Date: Thu, 6 Oct 2022 16:56:41 -0400 Subject: [PATCH 02/10] fixed some typos --- measure_gc_cycles.c | 2 +- utils.jl | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/measure_gc_cycles.c b/measure_gc_cycles.c index eb8b736..2ee8437 100644 --- a/measure_gc_cycles.c +++ b/measure_gc_cycles.c @@ -21,7 +21,7 @@ long perf_event_start(void) int fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0); if (fd == -1) { - fprintf(stderr, "Error opening leader\n", pe.config); + fprintf(stderr, "Error opening perf event\n"); exit(1); } diff --git a/utils.jl b/utils.jl index 9c68dca..f43cf8d 100644 --- a/utils.jl +++ b/utils.jl @@ -11,7 +11,7 @@ function gc_cb_pre() end function gc_cb_post() - cycles_in_gc[] += ccall((:perf_event_start, "lib_gc_benchmarks.so"), Clonglong, (Cint,), (perf_fd[],)) + cycles_in_gc[] += ccall((:perf_event_end, "lib_gc_benchmarks.so"), Clonglong, (Cint,), (perf_fd[],)) nothing end @@ -20,10 +20,10 @@ macro gctime(ex) :(Base.Experimental.@force_compile) : :() quote - ccall(:jl_gc_set_cb_pre_gc, Cvoid, (Ptr{Cvoid}, Cint), - @cfunction(gc_cb_pre, Cvoid, (Cint,)), true) - ccall(:jl_gc_set_cb_post_gc, Cvoid, (Ptr{Cvoid}, Cint), - @cfunction(gc_cb_post, Cvoid, (Cint,)), true) + # ccall(:jl_gc_set_cb_pre_gc, Cvoid, (Ptr{Cvoid}, Cint), + # @cfunction(gc_cb_pre, Cvoid, (Cint,)), true) + # ccall(:jl_gc_set_cb_post_gc, Cvoid, (Ptr{Cvoid}, Cint), + # @cfunction(gc_cb_post, Cvoid, (Cint,)), true) $fc local result try @@ -32,13 +32,12 @@ macro gctime(ex) local val = $(esc(ex)) local end_time = time_ns() local end_gc_num = Base.gc_num() - local cycles_in_gc = cycles_in_gc[] result = ( value = val, times = (end_time - start_time), gc_diff = Base.GC_Diff(end_gc_num, start_gc_num), gc_end = end_gc_num, - cycles_in_gc = cycles_in_gc, + cycles_in_gc = cycles_in_gc[], ) catch e @show e From d3a6703f8673633bebaa746eae6af0705e8ba3c7 Mon Sep 17 00:00:00 2001 From: Diogo C Netto Date: Fri, 7 Oct 2022 12:04:18 -0400 Subject: [PATCH 03/10] actually calling perf_event funcs --- measure_gc_cycles.c | 4 ++-- utils.jl | 26 +++++++++++++++----------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/measure_gc_cycles.c b/measure_gc_cycles.c index 2ee8437..d069e0a 100644 --- a/measure_gc_cycles.c +++ b/measure_gc_cycles.c @@ -8,7 +8,7 @@ #include #include -long perf_event_start(void) +long perf_event_start() { struct perf_event_attr pe; memset(&pe, 0, sizeof(pe)); @@ -31,7 +31,7 @@ long perf_event_start(void) return fd; } -long perf_event_end(int fd) +long perf_event_end(long fd) { long long count; ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); diff --git a/utils.jl b/utils.jl index f43cf8d..9dfef75 100644 --- a/utils.jl +++ b/utils.jl @@ -2,16 +2,18 @@ using Pkg Pkg.instantiate() # It is dumb that I have to do this using Serialization -const perf_fd = Ref(UInt64(0)) -const cycles_in_gc = Ref(UInt64(0)) +const perf_fd = Ref(Int64(0)) +const cycles_in_gc = Ref(Int128(0)) -function gc_cb_pre() - perf_fd[] = ccall((:perf_event_start, "lib_gc_benchmarks.so"), Cvoid, ()) +const GC_LIB = "../../../gc_benchmarks.so" + +function gc_cb_pre(full::Cint) + perf_fc[] = ccall((:perf_event_start, GC_LIB), Int64, ()) nothing end -function gc_cb_post() - cycles_in_gc[] += ccall((:perf_event_end, "lib_gc_benchmarks.so"), Clonglong, (Cint,), (perf_fd[],)) +function gc_cb_post(full::Cint) + cycles_in_gc[] += ccall((:perf_event_end, GC_LIB), Clonglong, (Cint,), (perf_fd[],)) nothing end @@ -20,18 +22,19 @@ macro gctime(ex) :(Base.Experimental.@force_compile) : :() quote - # ccall(:jl_gc_set_cb_pre_gc, Cvoid, (Ptr{Cvoid}, Cint), - # @cfunction(gc_cb_pre, Cvoid, (Cint,)), true) - # ccall(:jl_gc_set_cb_post_gc, Cvoid, (Ptr{Cvoid}, Cint), - # @cfunction(gc_cb_post, Cvoid, (Cint,)), true) $fc local result + ccall(:jl_gc_set_cb_pre_gc, Cvoid, (Ptr{Cvoid}, Cint), + @cfunction(gc_cb_pre, Cvoid, (Cint,)), true) + ccall(:jl_gc_set_cb_post_gc, Cvoid, (Ptr{Cvoid}, Cint), + @cfunction(gc_cb_post, Cvoid, (Cint,)), true) try local start_gc_num = Base.gc_num() local start_time = time_ns() local val = $(esc(ex)) local end_time = time_ns() local end_gc_num = Base.gc_num() + @show cycles_in_gc[] result = ( value = val, times = (end_time - start_time), @@ -45,7 +48,8 @@ macro gctime(ex) value = e, times = NaN, gc_diff = Base.GC_Diff(end_gc_num, start_gc_num), - gc_end = end_gc_num + gc_end = end_gc_num, + cycles_in_gc = NaN, ) end if "SERIALIZE" in ARGS From d5c4bbea80d9729608f74e7733b6ad74d7f85fec Mon Sep 17 00:00:00 2001 From: Diogo C Netto Date: Fri, 7 Oct 2022 12:25:43 -0400 Subject: [PATCH 04/10] syscall only once --- build.sh | 1 + measure_gc_cycles.c | 49 ++++++++++++++++++++++++++------------------- utils.jl | 7 ++++--- 3 files changed, 33 insertions(+), 24 deletions(-) create mode 100644 build.sh diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..522dcc5 --- /dev/null +++ b/build.sh @@ -0,0 +1 @@ +gcc -Wall -shared -fPIC -o gc_benchmarks.so measure_gc_cycles.c diff --git a/measure_gc_cycles.c b/measure_gc_cycles.c index d069e0a..12e2616 100644 --- a/measure_gc_cycles.c +++ b/measure_gc_cycles.c @@ -8,34 +8,41 @@ #include #include +void perf_event_reset(); + long perf_event_start() { - struct perf_event_attr pe; - memset(&pe, 0, sizeof(pe)); - pe.type = PERF_TYPE_HARDWARE; - pe.size = sizeof(pe); - pe.config = PERF_COUNT_HW_INSTRUCTIONS; - pe.disabled = 1; - pe.exclude_kernel = 1; - pe.exclude_hv = 1; - - int fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0); - if (fd == -1) { + struct perf_event_attr pe; + memset(&pe, 0, sizeof(pe)); + pe.type = PERF_TYPE_HARDWARE; + pe.size = sizeof(pe); + pe.config = PERF_COUNT_HW_INSTRUCTIONS; + pe.disabled = 1; + pe.exclude_kernel = 1; + pe.exclude_hv = 1; + + int fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0); + if (fd == -1) { fprintf(stderr, "Error opening perf event\n"); exit(1); - } + } + + perf_event_reset(); - ioctl(fd, PERF_EVENT_IOC_RESET, 0); - ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); + return fd; +} - return fd; +void perf_event_reset(long fd) +{ + ioctl(fd, PERF_EVENT_IOC_RESET, 0); + ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); } -long perf_event_end(long fd) + +long perf_event_count(long fd) { - long long count; - ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); - read(fd, &count, sizeof(count)); - close(fd); - return count; + long long count; + ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); + read(fd, &count, sizeof(count)); + return count; } diff --git a/utils.jl b/utils.jl index 9dfef75..681c2de 100644 --- a/utils.jl +++ b/utils.jl @@ -8,15 +8,17 @@ const cycles_in_gc = Ref(Int128(0)) const GC_LIB = "../../../gc_benchmarks.so" function gc_cb_pre(full::Cint) - perf_fc[] = ccall((:perf_event_start, GC_LIB), Int64, ()) + ccall((:perf_event_reset, GC_LIB), Cvoid, (Clong,), perf_fd[]) nothing end function gc_cb_post(full::Cint) - cycles_in_gc[] += ccall((:perf_event_end, GC_LIB), Clonglong, (Cint,), (perf_fd[],)) + cycles_in_gc[] += ccall((:perf_event_count, GC_LIB), Clonglong, (Clong,), perf_fd[]) nothing end +perf_fd[] = ccall((:perf_event_start, GC_LIB), Clong, ()) + macro gctime(ex) fc = isdefined(Base.Experimental, Symbol("@force_compile")) ? :(Base.Experimental.@force_compile) : @@ -34,7 +36,6 @@ macro gctime(ex) local val = $(esc(ex)) local end_time = time_ns() local end_gc_num = Base.gc_num() - @show cycles_in_gc[] result = ( value = val, times = (end_time - start_time), From 3852da02572427e71add1bebd093ae34e02e71ab Mon Sep 17 00:00:00 2001 From: Diogo C Netto Date: Fri, 7 Oct 2022 12:44:43 -0400 Subject: [PATCH 05/10] gc cycles in table --- measure_gc_cycles.c | 2 +- run_benchmarks.jl | 9 ++++++--- utils.jl | 6 +++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/measure_gc_cycles.c b/measure_gc_cycles.c index 12e2616..84cfa96 100644 --- a/measure_gc_cycles.c +++ b/measure_gc_cycles.c @@ -16,7 +16,7 @@ long perf_event_start() memset(&pe, 0, sizeof(pe)); pe.type = PERF_TYPE_HARDWARE; pe.size = sizeof(pe); - pe.config = PERF_COUNT_HW_INSTRUCTIONS; + pe.config = PERF_COUNT_HW_CPU_CYCLES; pe.disabled = 1; pe.exclude_kernel = 1; pe.exclude_hv = 1; diff --git a/run_benchmarks.jl b/run_benchmarks.jl index 0d0f1f9..0fb7f1f 100644 --- a/run_benchmarks.jl +++ b/run_benchmarks.jl @@ -45,6 +45,7 @@ function run_bench(runs, threads, file, show_json = false) times = [] gc_diff = [] gc_end = [] + gc_cycles = [] for _ in 1:runs # uglyness to communicate over non stdout (specifically file descriptor 3) p = Base.PipeEndpoint() @@ -57,9 +58,11 @@ function run_bench(runs, threads, file, show_json = false) push!(times, r.times) push!(gc_diff, r.gc_diff) push!(gc_end, r.gc_end) + push!(gc_cycles, r.gc_cycles) end total_stats = get_stats(times) ./ 1_000_000 gc_time = get_stats(map(stat->stat.total_time, gc_end)) ./ 1_000_000 + gc_cycles = get_stats(gc_cycles) ./ 1_000_000 mark_time = get_stats(map(stat->stat.total_mark_time, gc_end)) ./ 1_000_000 sweep_time = get_stats(map(stat->stat.total_sweep_time, gc_end)) ./ 1_000_000 max_pause = get_stats(map(stat->stat.max_pause, gc_end)) ./ 1_000_000 @@ -67,8 +70,8 @@ function run_bench(runs, threads, file, show_json = false) max_mem = get_stats(map(stat->stat.max_memory, gc_end)) ./ 1024^2 pct_gc = get_stats(map((t,stat)->(stat.total_time/t), times, gc_diff)) .* 100 - header = (["", "total time", "gc time", "mark time", "sweep time", "max GC pause", "time to safepoint", "max heap", "percent gc"], - ["", "ms", "ms", "ms", "ms", "ms", "us", "MB", "%" ]) + header = (["", "total time", "gc time", "cycles in gc", "mark time", "sweep time", "max GC pause", "time to safepoint", "max heap", "percent gc"], + ["", "ms", "ms", "1e6" , "ms" , "ms", "ms", "us", "MB", "%" ]) labels = ["minimum", "median", "maximum"] highlighters = highlight_col(4, 10, 100) # max pause append!(highlighters, highlight_col(5, 1, 10)) # time to safepoint @@ -85,7 +88,7 @@ function run_bench(runs, threads, file, show_json = false) ("pct gc", pct_gc)]) JSON.print(data) else - data = hcat(labels, total_stats, gc_time, mark_time, sweep_time, max_pause, time_to_safepoint, max_mem, pct_gc) + data = hcat(labels, total_stats, gc_time, gc_cycles, mark_time, sweep_time, max_pause, time_to_safepoint, max_mem, pct_gc) pretty_table(data; header, formatters=ft_printf("%0.0f"), highlighters) end end diff --git a/utils.jl b/utils.jl index 681c2de..5f5dc9e 100644 --- a/utils.jl +++ b/utils.jl @@ -3,7 +3,7 @@ Pkg.instantiate() # It is dumb that I have to do this using Serialization const perf_fd = Ref(Int64(0)) -const cycles_in_gc = Ref(Int128(0)) +const gc_cycles = Ref(Int128(0)) const GC_LIB = "../../../gc_benchmarks.so" @@ -13,7 +13,7 @@ function gc_cb_pre(full::Cint) end function gc_cb_post(full::Cint) - cycles_in_gc[] += ccall((:perf_event_count, GC_LIB), Clonglong, (Clong,), perf_fd[]) + gc_cycles[] += ccall((:perf_event_count, GC_LIB), Clonglong, (Clong,), perf_fd[]) nothing end @@ -41,7 +41,7 @@ macro gctime(ex) times = (end_time - start_time), gc_diff = Base.GC_Diff(end_gc_num, start_gc_num), gc_end = end_gc_num, - cycles_in_gc = cycles_in_gc[], + gc_cycles = gc_cycles[], ) catch e @show e From ac9c9bd964af175a5ee87a1396f44d2284a3cced Mon Sep 17 00:00:00 2001 From: Diogo C Netto Date: Fri, 7 Oct 2022 14:36:20 -0400 Subject: [PATCH 06/10] running 2N times to avoid syscall overhead --- measure_gc_cycles.c | 4 ++-- utils.jl | 13 +++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/measure_gc_cycles.c b/measure_gc_cycles.c index 84cfa96..2af2102 100644 --- a/measure_gc_cycles.c +++ b/measure_gc_cycles.c @@ -23,8 +23,8 @@ long perf_event_start() int fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0); if (fd == -1) { - fprintf(stderr, "Error opening perf event\n"); - exit(1); + fprintf(stderr, "Error opening perf event\n"); + exit(1); } perf_event_reset(); diff --git a/utils.jl b/utils.jl index 5f5dc9e..bb750a3 100644 --- a/utils.jl +++ b/utils.jl @@ -17,8 +17,6 @@ function gc_cb_post(full::Cint) nothing end -perf_fd[] = ccall((:perf_event_start, GC_LIB), Clong, ()) - macro gctime(ex) fc = isdefined(Base.Experimental, Symbol("@force_compile")) ? :(Base.Experimental.@force_compile) : @@ -26,16 +24,19 @@ macro gctime(ex) quote $fc local result - ccall(:jl_gc_set_cb_pre_gc, Cvoid, (Ptr{Cvoid}, Cint), - @cfunction(gc_cb_pre, Cvoid, (Cint,)), true) - ccall(:jl_gc_set_cb_post_gc, Cvoid, (Ptr{Cvoid}, Cint), - @cfunction(gc_cb_post, Cvoid, (Cint,)), true) try local start_gc_num = Base.gc_num() local start_time = time_ns() local val = $(esc(ex)) local end_time = time_ns() local end_gc_num = Base.gc_num() + # Re-run with `perf` callbacks turned on + perf_fd[] = ccall((:perf_event_start, GC_LIB), Clong, ()) + ccall(:jl_gc_set_cb_pre_gc, Cvoid, (Ptr{Cvoid}, Cint), + @cfunction(gc_cb_pre, Cvoid, (Cint,)), true) + ccall(:jl_gc_set_cb_post_gc, Cvoid, (Ptr{Cvoid}, Cint), + @cfunction(gc_cb_post, Cvoid, (Cint,)), true) + $(esc(ex)) result = ( value = val, times = (end_time - start_time), From e9ba786ef210dfe3ef33534c11e24880f0487468 Mon Sep 17 00:00:00 2001 From: Diogo C Netto Date: Fri, 21 Oct 2022 21:34:49 -0400 Subject: [PATCH 07/10] don't wrap counters in julia --- measure_gc_cycles.c | 18 ++++++++++++------ utils.jl | 23 +++++++++-------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/measure_gc_cycles.c b/measure_gc_cycles.c index 2af2102..b2fc02e 100644 --- a/measure_gc_cycles.c +++ b/measure_gc_cycles.c @@ -8,9 +8,12 @@ #include #include +long long total_count = 0; +int fd = 0; + void perf_event_reset(); -long perf_event_start() +void perf_event_start() { struct perf_event_attr pe; memset(&pe, 0, sizeof(pe)); @@ -21,15 +24,13 @@ long perf_event_start() pe.exclude_kernel = 1; pe.exclude_hv = 1; - int fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0); + fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0); if (fd == -1) { fprintf(stderr, "Error opening perf event\n"); exit(1); } perf_event_reset(); - - return fd; } void perf_event_reset(long fd) @@ -39,10 +40,15 @@ void perf_event_reset(long fd) } -long perf_event_count(long fd) +void perf_event_count(long fd) { long long count; ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); read(fd, &count, sizeof(count)); - return count; + total_count += count; +} + +long perf_event_get_count() +{ + return total_count; } diff --git a/utils.jl b/utils.jl index bb750a3..052976d 100644 --- a/utils.jl +++ b/utils.jl @@ -1,21 +1,16 @@ using Pkg Pkg.instantiate() # It is dumb that I have to do this +using Libdl using Serialization const perf_fd = Ref(Int64(0)) const gc_cycles = Ref(Int128(0)) const GC_LIB = "../../../gc_benchmarks.so" - -function gc_cb_pre(full::Cint) - ccall((:perf_event_reset, GC_LIB), Cvoid, (Clong,), perf_fd[]) - nothing -end - -function gc_cb_post(full::Cint) - gc_cycles[] += ccall((:perf_event_count, GC_LIB), Clonglong, (Clong,), perf_fd[]) - nothing -end +lib = Libdl.dlopen(GC_LIB) +sym_reset = Libdl.dlsym(lib, :perf_event_reset) +sym_count = Libdl.dlsym(lib, :perf_event_count) +sym_get_count = Libdl.dlsym(lib, :perf_event_get_count) macro gctime(ex) fc = isdefined(Base.Experimental, Symbol("@force_compile")) ? @@ -31,18 +26,18 @@ macro gctime(ex) local end_time = time_ns() local end_gc_num = Base.gc_num() # Re-run with `perf` callbacks turned on - perf_fd[] = ccall((:perf_event_start, GC_LIB), Clong, ()) + ccall((:perf_event_start, GC_LIB), Cvoid, ()) ccall(:jl_gc_set_cb_pre_gc, Cvoid, (Ptr{Cvoid}, Cint), - @cfunction(gc_cb_pre, Cvoid, (Cint,)), true) + sym_reset, true) ccall(:jl_gc_set_cb_post_gc, Cvoid, (Ptr{Cvoid}, Cint), - @cfunction(gc_cb_post, Cvoid, (Cint,)), true) + sym_count, true) $(esc(ex)) result = ( value = val, times = (end_time - start_time), gc_diff = Base.GC_Diff(end_gc_num, start_gc_num), gc_end = end_gc_num, - gc_cycles = gc_cycles[], + gc_cycles = ccall(sym_get_count, Clong, ()), ) catch e @show e From 55d9699d452dbd495706fdc3af4a631a4ea8125c Mon Sep 17 00:00:00 2001 From: Diogo C Netto Date: Fri, 21 Oct 2022 21:42:50 -0400 Subject: [PATCH 08/10] fmt --- measure_gc_cycles.c | 2 +- utils.jl | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/measure_gc_cycles.c b/measure_gc_cycles.c index b2fc02e..f3eb025 100644 --- a/measure_gc_cycles.c +++ b/measure_gc_cycles.c @@ -50,5 +50,5 @@ void perf_event_count(long fd) long perf_event_get_count() { - return total_count; + return total_count; } diff --git a/utils.jl b/utils.jl index 052976d..4976f99 100644 --- a/utils.jl +++ b/utils.jl @@ -3,11 +3,9 @@ Pkg.instantiate() # It is dumb that I have to do this using Libdl using Serialization -const perf_fd = Ref(Int64(0)) -const gc_cycles = Ref(Int128(0)) - const GC_LIB = "../../../gc_benchmarks.so" lib = Libdl.dlopen(GC_LIB) +sym_start = Libdl.dlsym(lib, :perf_event_start) sym_reset = Libdl.dlsym(lib, :perf_event_reset) sym_count = Libdl.dlsym(lib, :perf_event_count) sym_get_count = Libdl.dlsym(lib, :perf_event_get_count) @@ -26,7 +24,7 @@ macro gctime(ex) local end_time = time_ns() local end_gc_num = Base.gc_num() # Re-run with `perf` callbacks turned on - ccall((:perf_event_start, GC_LIB), Cvoid, ()) + ccall(sym_start, Cvoid, ()) ccall(:jl_gc_set_cb_pre_gc, Cvoid, (Ptr{Cvoid}, Cint), sym_reset, true) ccall(:jl_gc_set_cb_post_gc, Cvoid, (Ptr{Cvoid}, Cint), From 493008108145e89c3d5d89c9f0e9c02dd992b6f2 Mon Sep 17 00:00:00 2001 From: Diogo C Netto Date: Mon, 24 Oct 2022 12:23:18 -0400 Subject: [PATCH 09/10] reading from actual opened fd --- measure_gc_cycles.c | 8 ++++---- utils.jl | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/measure_gc_cycles.c b/measure_gc_cycles.c index f3eb025..8c5114c 100644 --- a/measure_gc_cycles.c +++ b/measure_gc_cycles.c @@ -9,7 +9,7 @@ #include long long total_count = 0; -int fd = 0; +long fd = 0; void perf_event_reset(); @@ -33,14 +33,14 @@ void perf_event_start() perf_event_reset(); } -void perf_event_reset(long fd) +void perf_event_reset() { ioctl(fd, PERF_EVENT_IOC_RESET, 0); ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); } -void perf_event_count(long fd) +void perf_event_count() { long long count; ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); @@ -48,7 +48,7 @@ void perf_event_count(long fd) total_count += count; } -long perf_event_get_count() +long long perf_event_get_count() { return total_count; } diff --git a/utils.jl b/utils.jl index 4976f99..ab1af34 100644 --- a/utils.jl +++ b/utils.jl @@ -35,7 +35,7 @@ macro gctime(ex) times = (end_time - start_time), gc_diff = Base.GC_Diff(end_gc_num, start_gc_num), gc_end = end_gc_num, - gc_cycles = ccall(sym_get_count, Clong, ()), + gc_cycles = ccall(sym_get_count, Clonglong, ()), ) catch e @show e @@ -44,7 +44,7 @@ macro gctime(ex) times = NaN, gc_diff = Base.GC_Diff(end_gc_num, start_gc_num), gc_end = end_gc_num, - cycles_in_gc = NaN, + gc_cycles = NaN, ) end if "SERIALIZE" in ARGS From f0f05400b931d4dc134702c8ead6d2a75be9c853 Mon Sep 17 00:00:00 2001 From: Diogo C Netto Date: Tue, 25 Oct 2022 10:41:52 -0400 Subject: [PATCH 10/10] cycle count fix --- run_benchmarks.jl | 9 ++++++++- utils.jl | 26 ++++++++++++++------------ 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/run_benchmarks.jl b/run_benchmarks.jl index 0fb7f1f..a464b7f 100644 --- a/run_benchmarks.jl +++ b/run_benchmarks.jl @@ -49,7 +49,7 @@ function run_bench(runs, threads, file, show_json = false) for _ in 1:runs # uglyness to communicate over non stdout (specifically file descriptor 3) p = Base.PipeEndpoint() - cmd = `$JULIAVER --project=. --threads=$threads $file SERIALIZE` + cmd = `$JULIAVER --project=. --threads=$threads $file SERIALIZE --no-cycles-count` cmd = run(Base.CmdRedirect(cmd, p, 3), stdin, stdout, stderr, wait=false) r = deserialize(p) @assert success(cmd) @@ -58,6 +58,13 @@ function run_bench(runs, threads, file, show_json = false) push!(times, r.times) push!(gc_diff, r.gc_diff) push!(gc_end, r.gc_end) + # run once more to measure cycles + p = Base.PipeEndpoint() + cmd = `$JULIAVER --project=. --threads=$threads $file SERIALIZE --cycles-count` + cmd = run(Base.CmdRedirect(cmd, p, 3), stdin, stdout, stderr, wait=false) + r = deserialize(p) + @assert success(cmd) + # end uglyness push!(gc_cycles, r.gc_cycles) end total_stats = get_stats(times) ./ 1_000_000 diff --git a/utils.jl b/utils.jl index ab1af34..702c729 100644 --- a/utils.jl +++ b/utils.jl @@ -4,11 +4,13 @@ using Libdl using Serialization const GC_LIB = "../../../gc_benchmarks.so" -lib = Libdl.dlopen(GC_LIB) -sym_start = Libdl.dlsym(lib, :perf_event_start) -sym_reset = Libdl.dlsym(lib, :perf_event_reset) -sym_count = Libdl.dlsym(lib, :perf_event_count) -sym_get_count = Libdl.dlsym(lib, :perf_event_get_count) +const lib = Libdl.dlopen(GC_LIB) +const sym_start = Libdl.dlsym(lib, :perf_event_start) +const sym_reset = Libdl.dlsym(lib, :perf_event_reset) +const sym_count = Libdl.dlsym(lib, :perf_event_count) +const sym_get_count = Libdl.dlsym(lib, :perf_event_get_count) + +const count_cycles = ARGS[2] macro gctime(ex) fc = isdefined(Base.Experimental, Symbol("@force_compile")) ? @@ -18,18 +20,18 @@ macro gctime(ex) $fc local result try + if count_cycles == "--cycles-count" + ccall(sym_start, Cvoid, ()) + ccall(:jl_gc_set_cb_pre_gc, Cvoid, (Ptr{Cvoid}, Cint), + sym_reset, true) + ccall(:jl_gc_set_cb_post_gc, Cvoid, (Ptr{Cvoid}, Cint), + sym_count, true) + end local start_gc_num = Base.gc_num() local start_time = time_ns() local val = $(esc(ex)) local end_time = time_ns() local end_gc_num = Base.gc_num() - # Re-run with `perf` callbacks turned on - ccall(sym_start, Cvoid, ()) - ccall(:jl_gc_set_cb_pre_gc, Cvoid, (Ptr{Cvoid}, Cint), - sym_reset, true) - ccall(:jl_gc_set_cb_post_gc, Cvoid, (Ptr{Cvoid}, Cint), - sym_count, true) - $(esc(ex)) result = ( value = val, times = (end_time - start_time),