Skip to content

Commit

Permalink
backport refinements to MemBalancer to v1.9.2+RAI for further testing
Browse files Browse the repository at this point in the history
  • Loading branch information
d-netto committed Dec 14, 2023
1 parent f221d80 commit 155f9b4
Show file tree
Hide file tree
Showing 12 changed files with 374 additions and 165 deletions.
6 changes: 6 additions & 0 deletions Make.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1461,6 +1461,12 @@ endef
# Overridable in Make.user
WINE ?= wine

ifeq ($(BINARY),32)
HEAPLIM := --heap-size-hint=1000M
else
HEAPLIM :=
endif

# many of the following targets must be = not := because the expansion of the makefile functions (and $1) shouldn't happen until later
ifeq ($(BUILD_OS), WINNT) # MSYS
spawn = $(1)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Language changes

Compiler/Runtime improvements
-----------------------------
* Updated GC heuristics to count allocated pages instead of individual objects ([#50144]).

* Time to first execution (TTFX, sometimes called time to first plot) is greatly reduced. Package precompilation now
saves native code into a "pkgimage", meaning that code generated during the precompilation process will not
Expand Down
12 changes: 9 additions & 3 deletions doc/src/devdocs/gc.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ This scheme eliminates the need of explicitly keeping a flag to indicate a full
## Heuristics

GC heuristics tune the GC by changing the size of the allocation interval between garbage collections.
If a GC was unproductive, then we increase the size of the allocation interval to allow objects more time to die.
If a GC returns a lot of space we can shrink the interval. The goal is to find a steady state where we are
allocating just about the same amount as we are collecting.

The GC heuristics measure how big the heap size is after a collection and set the next
collection according to the algorithm described by https://dl.acm.org/doi/10.1145/3563323,
in summary, it argues that the heap target should have a square root relationship with the live heap, and that it should also be scaled by how fast the GC is freeing objects and how fast the mutators are allocating.
The heuristics measure the heap size by counting the number of pages that are in use and the objects that use malloc. Previously we measured the heap size by counting
the alive objects, but that doesn't take into account fragmentation which could lead to bad decisions, that also meant that we used thread local information (allocations) to make
decisions about a process wide (when to GC), measuring pages means the decision is global.

The GC will do full collections when the heap size reaches 80% of the maximum allowed size.
53 changes: 45 additions & 8 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#include "gc.h"
#include "julia.h"
#include <inttypes.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>

// re-include assert.h without NDEBUG,
Expand Down Expand Up @@ -847,11 +850,11 @@ void gc_time_pool_end(int sweep_full)
double sweep_speed = sweep_gb / sweep_pool_sec;
jl_safe_printf("GC sweep pools end %.2f ms at %.1f GB/s "
"(skipped %.2f %% of %" PRId64 ", swept %" PRId64 " pgs, "
"%" PRId64 " freed with %" PRId64 " lazily) %s\n",
"%" PRId64 " freed) %s\n",
sweep_pool_sec * 1000, sweep_speed,
(total_pages ? ((double)skipped_pages * 100) / total_pages : 0),
total_pages, total_pages - skipped_pages,
freed_pages, lazy_freed_pages,
freed_pages,
sweep_full ? "full" : "quick");
}

Expand Down Expand Up @@ -943,12 +946,12 @@ void gc_time_sweep_pause(uint64_t gc_end_t, int64_t actual_allocd,
jl_safe_printf("GC sweep pause %.2f ms live %" PRId64 " kB "
"(freed %" PRId64 " kB EST %" PRId64 " kB "
"[error %" PRId64 "] = %d%% of allocd b %" PRIu64 ") "
"(%.2f ms in post_mark) %s | next in %" PRId64 " kB\n",
"(%.2f ms in post_mark) %s\n",
jl_ns2ms(sweep_pause), live_bytes / 1024,
gc_num.freed / 1024, estimate_freed / 1024,
gc_num.freed - estimate_freed, pct, gc_num.since_sweep / 1024,
jl_ns2ms(gc_postmark_end - gc_premark_end),
sweep_full ? "full" : "quick", -gc_num.allocd / 1024);
sweep_full ? "full" : "quick");
}

void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
Expand All @@ -968,11 +971,35 @@ void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
jl_safe_printf("TS: %" PRIu64 " Minor collection: estimate freed = %" PRIu64
" live = %" PRIu64 "m new interval = %" PRIu64 "m pause time = %"
PRIu64 "ms ttsp = %" PRIu64 "us mark time = %" PRIu64
"ms sweep time = %" PRIu64 "ms \n",
"ms sweep time = %" PRIu64 "ms\n",
end, freed, live/1024/1024,
interval/1024/1024, pause/1000000, ttsp,
mark/1000000,sweep/1000000);
}

void gc_heuristics_summary(
uint64_t old_alloc_diff, uint64_t alloc_mem,
uint64_t old_mut_time, uint64_t alloc_time,
uint64_t old_freed_diff, uint64_t gc_mem,
uint64_t old_pause_time, uint64_t gc_time,
int thrash_counter, const char *reason,
uint64_t current_heap, uint64_t target_heap)
{
jl_safe_printf("Estimates: alloc_diff=%" PRIu64 "kB (%" PRIu64 ")"
//" nongc_time=%" PRIu64 "ns (%" PRIu64 ")"
" mut_time=%" PRIu64 "ns (%" PRIu64 ")"
" freed_diff=%" PRIu64 "kB (%" PRIu64 ")"
" pause_time=%" PRIu64 "ns (%" PRIu64 ")"
" thrash_counter=%d%s"
" current_heap=%" PRIu64 " MB"
" target_heap=%" PRIu64 " MB\n",
old_alloc_diff/1024, alloc_mem/1024,
old_mut_time/1000, alloc_time/1000,
old_freed_diff/1024, gc_mem/1024,
old_pause_time/1000, gc_time/1000,
thrash_counter, reason,
current_heap/1024/1024, target_heap/1024/1024);
}
#endif

void jl_gc_debug_init(void)
Expand Down Expand Up @@ -1216,15 +1243,25 @@ JL_DLLEXPORT void jl_enable_gc_logging(int enable) {
gc_logging_enabled = enable;
}

void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT {
void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT {
if (!gc_logging_enabled) {
return;
}
jl_safe_printf("GC: pause %.2fms. collected %fMB. %s %s\n",
pause/1e6, freed/1e6,
jl_safe_printf("\nGC: pause %.2fms. collected %fMB. %s %s\n",
pause/1e6, freed/(double)(1<<20),
full ? "full" : "incr",
recollect ? "recollect" : ""
);

jl_safe_printf("Heap stats: bytes_mapped %.2f MB, bytes_resident %.2f MB,\nheap_size %.2f MB, heap_target %.2f MB, Fragmentation %.3f\n",
jl_atomic_load_relaxed(&gc_heap_stats.bytes_mapped)/(double)(1<<20),
jl_atomic_load_relaxed(&gc_heap_stats.bytes_resident)/(double)(1<<20),
// live_bytes/(double)(1<<20), live byes tracking is not accurate.
jl_atomic_load_relaxed(&gc_heap_stats.heap_size)/(double)(1<<20),
jl_atomic_load_relaxed(&gc_heap_stats.heap_target)/(double)(1<<20),
(double)live_bytes/(double)jl_atomic_load_relaxed(&gc_heap_stats.heap_size)
);
// Should fragmentation use bytes_resident instead of heap_size?
}

#ifdef __cplusplus
Expand Down
4 changes: 4 additions & 0 deletions src/gc-pages.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ char *jl_gc_try_alloc_pages_(int pg_cnt) JL_NOTSAFEPOINT
// round data pointer up to the nearest gc_page_data-aligned
// boundary if mmap didn't already do so.
mem = (char*)gc_page_data(mem + GC_PAGE_SZ - 1);
jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mapped, pages_sz);
jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, pages_sz);
return mem;
}

Expand Down Expand Up @@ -138,6 +140,7 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
// try to get page from `pool_freed`
meta = pop_lf_back(&global_page_pool_freed);
if (meta != NULL) {
jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, GC_PAGE_SZ);
gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
goto exit;
}
Expand Down Expand Up @@ -213,6 +216,7 @@ void jl_gc_free_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
#endif
msan_unpoison(p, decommit_size);
jl_atomic_fetch_add(&current_pg_count, -1);
jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, -decommit_size);
}

#ifdef __cplusplus
Expand Down
Loading

0 comments on commit 155f9b4

Please sign in to comment.