From 848ae2c6491c909ec0bb93cb1b347927349cb6a6 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Thu, 16 Nov 2023 17:24:01 -0500 Subject: [PATCH 1/5] free more thread state in jl_delete_thread and GC --- src/gc-stacks.c | 3 +++ src/gc.c | 28 ++++++++++++++++++++++++++++ src/threading.c | 5 +++++ src/work-stealing-queue.h | 6 ++++++ 4 files changed, 42 insertions(+) diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 0318162289f11..7af1017cb55d9 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -223,6 +223,9 @@ void sweep_stack_pools(void) void *stk = small_arraylist_pop(al); free_stack(stk, pool_sizes[p]); } + if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { + small_arraylist_free(al); + } } small_arraylist_t *live_tasks = &ptls2->heap.live_tasks; diff --git a/src/gc.c b/src/gc.c index c2d194b8a28bb..2b237ca95fe49 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3775,6 +3775,22 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) else { ptls2->heap.remset->len = 0; } + // free empty GC state for threads that have exited + if (ptls2->current_task == NULL) { + jl_thread_heap_t *heap = &ptls2->heap; + if (heap->weak_refs.len == 0) + small_arraylist_free(&heap->weak_refs); + if (heap->live_tasks.len == 0) + small_arraylist_free(&heap->live_tasks); + if (heap->remset->len == 0) + arraylist_free(heap->remset); + if (heap->last_remset->len == 0) + arraylist_free(heap->last_remset); + if (ptls2->finalizers.len == 0) + arraylist_free(&ptls->finalizers); + if (ptls2->sweep_objs.len == 0) + arraylist_free(&ptls->sweep_objs); + } } #ifdef __GLIBC__ @@ -3993,6 +4009,18 @@ void jl_init_thread_heap(jl_ptls_t ptls) jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); } +void jl_free_thread_gc_state(jl_ptls_t ptls) +{ + jl_gc_markqueue_t *mq = &ptls->mark_queue; + ws_queue_t *cq = &mq->chunk_queue; + free_ws_array(jl_atomic_load_relaxed(&cq->array)); + jl_atomic_store_relaxed(&cq->array, NULL); + ws_queue_t *q = &mq->ptr_queue; + free_ws_array(jl_atomic_load_relaxed(&q->array)); + jl_atomic_store_relaxed(&q->array, NULL); + arraylist_free(&mq->reclaim_set); +} + // System-wide initializations void jl_gc_init(void) { diff --git a/src/threading.c b/src/threading.c index dcfc6ac7e93bb..97b2a247cd202 100644 --- a/src/threading.c +++ b/src/threading.c @@ -508,6 +508,11 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER #else pthread_mutex_unlock(&in_signal_lock); #endif + free(ptls->bt_data); + // allow the page root_task is on to be freed + ptls->root_task = NULL; + void jl_free_thread_gc_state(jl_ptls_t ptls); + jl_free_thread_gc_state(ptls); // then park in safe-region (void)jl_gc_safe_enter(ptls); } diff --git a/src/work-stealing-queue.h b/src/work-stealing-queue.h index 5902c2ac6af9f..73c6e34e36de8 100644 --- a/src/work-stealing-queue.h +++ b/src/work-stealing-queue.h @@ -34,6 +34,12 @@ static inline ws_array_t *create_ws_array(size_t capacity, int32_t eltsz) JL_NOT return a; } +static inline void free_ws_array(ws_array_t *a) +{ + free(a->buffer); + free(a); +} + typedef struct { _Atomic(int64_t) top; char _padding[JL_CACHE_BYTE_ALIGNMENT - sizeof(_Atomic(int64_t))]; From a125bc22c7ec2885dbc424631d597e9b322d2673 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Tue, 21 Nov 2023 17:55:51 -0500 Subject: [PATCH 2/5] free even more state for exited threads --- src/gc-stacks.c | 5 +++++ src/gc.c | 21 ++++++++++++++++++--- src/threading.c | 5 ++++- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 7af1017cb55d9..eb627ec39d409 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -203,6 +203,8 @@ void sweep_stack_pools(void) assert(gc_n_threads); for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; + if (ptls2 == NULL) + continue; // free half of stacks that remain unused since last sweep for (int p = 0; p < JL_N_STACK_POOLS; p++) { @@ -227,6 +229,9 @@ void sweep_stack_pools(void) small_arraylist_free(al); } } + if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { + small_arraylist_free(ptls2->heap.free_stacks); + } small_arraylist_t *live_tasks = &ptls2->heap.live_tasks; size_t n = 0; diff --git a/src/gc.c b/src/gc.c index 2b237ca95fe49..9edf863e28216 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1746,6 +1746,19 @@ void gc_free_pages(void) } } +void gc_move_to_global_page_pool(jl_gc_page_stack_t *pgstack) +{ + while (1) { + jl_gc_pagemeta_t *pg = pop_lf_back(pgstack); + if (pg == NULL) { + break; + } + jl_gc_free_page(pg); + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -GC_PAGE_SZ); + push_lf_back(&global_page_pool_freed, pg); + } +} + // setup the data-structures for a sweep over all memory pools static void gc_sweep_pool(void) { @@ -3776,7 +3789,8 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) ptls2->heap.remset->len = 0; } // free empty GC state for threads that have exited - if (ptls2->current_task == NULL) { + if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL && + (ptls->tid < gc_first_tid || ptls2->tid >= gc_first_tid + jl_n_gcthreads)) { jl_thread_heap_t *heap = &ptls2->heap; if (heap->weak_refs.len == 0) small_arraylist_free(&heap->weak_refs); @@ -3787,9 +3801,10 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) if (heap->last_remset->len == 0) arraylist_free(heap->last_remset); if (ptls2->finalizers.len == 0) - arraylist_free(&ptls->finalizers); + arraylist_free(&ptls2->finalizers); if (ptls2->sweep_objs.len == 0) - arraylist_free(&ptls->sweep_objs); + arraylist_free(&ptls2->sweep_objs); + gc_move_to_global_page_pool(&ptls2->page_metadata_buffered); } } diff --git a/src/threading.c b/src/threading.c index 97b2a247cd202..2eecb59db1f17 100644 --- a/src/threading.c +++ b/src/threading.c @@ -435,6 +435,8 @@ JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void) void jl_task_frame_noreturn(jl_task_t *ct) JL_NOTSAFEPOINT; void scheduler_delete_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT; +void jl_free_thread_gc_state(jl_ptls_t ptls); + static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER { #ifndef _OS_WINDOWS_ @@ -509,9 +511,10 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER pthread_mutex_unlock(&in_signal_lock); #endif free(ptls->bt_data); + small_arraylist_free(&ptls->locks); + ptls->previous_exception = NULL; // allow the page root_task is on to be freed ptls->root_task = NULL; - void jl_free_thread_gc_state(jl_ptls_t ptls); jl_free_thread_gc_state(ptls); // then park in safe-region (void)jl_gc_safe_enter(ptls); From 138aba7320c4c37b1ad6330ec46531077f8ee516 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Fri, 23 Feb 2024 17:04:29 -0500 Subject: [PATCH 3/5] improve `--heap-size-hint` arg handling (#48050) Previously, `--heap-size-hint` would silently ignore many flavors of "bad" input, parsing things like "3PB" as 3 bytes. This change makes it significantly less permissive, erroring unless it can parse a number (still relying on the C `sscanf` `%Lf` format specifier there) with an optional unit (case-insensitive, either with or without the trailing `b`). Also test it. --- doc/man/julia.1 | 7 +- doc/src/manual/command-line-interface.md | 2 +- src/jloptions.c | 90 ++++++++++++++---------- test/cmdlineargs.jl | 16 +++++ 4 files changed, 73 insertions(+), 42 deletions(-) diff --git a/doc/man/julia.1 b/doc/man/julia.1 index c58b00120b7ec..afde0326e4952 100644 --- a/doc/man/julia.1 +++ b/doc/man/julia.1 @@ -228,9 +228,10 @@ fallbacks to the latest compatible BugReporting.jl if not. For more information, --bug-report=help. .TP ---heap-size-hint= -Forces garbage collection if memory usage is higher than that value. The value can be -specified in units of K, M, G, T, or % of physical memory. +--heap-size-hint= +Forces garbage collection if memory usage is higher than the given value. +The value may be specified as a number of bytes, optionally in units of +KB, MB, GB, or TB, or as a percentage of physical memory with %. .TP --compile={yes*|no|all|min} diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md index 4faa3e8d3fc05..448964bf1ef8a 100644 --- a/doc/src/manual/command-line-interface.md +++ b/doc/src/manual/command-line-interface.md @@ -213,7 +213,7 @@ The following is a complete list of command-line switches available when launchi |`--output-incremental={yes\|no*}` |Generate an incremental output file (rather than complete)| |`--trace-compile={stderr,name}` |Print precompile statements for methods compiled during execution or save to a path| |`--image-codegen` |Force generate code in imaging mode| -|`--heap-size-hint=` |Forces garbage collection if memory usage is higher than that value. The memory hint might be specified in megabytes (e.g., 500M) or gigabytes (e.g., 1G)| +|`--heap-size-hint=` |Forces garbage collection if memory usage is higher than the given value. The value may be specified as a number of bytes, optionally in units of KB, MB, GB, or TB, or as a percentage of physical memory with %.| !!! compat "Julia 1.1" diff --git a/src/jloptions.c b/src/jloptions.c index 4b3f7209dc56d..570d021351104 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -8,6 +8,7 @@ #include #include + #include "julia_assert.h" #ifdef _OS_WINDOWS_ @@ -18,6 +19,15 @@ char *shlib_ext = ".dylib"; char *shlib_ext = ".so"; #endif +/* This simple hand-crafted tolower exists to avoid locale-dependent effects in + * behaviors (and utf8proc_tolower wasn't linking properly on all platforms) */ +static char ascii_tolower(char c) +{ + if ('A' <= c && c <= 'Z') + return c - 'A' + 'a'; + return c; +} + static const char system_image_path[256] = "\0" JL_SYSTEM_IMAGE_PATH; JL_DLLEXPORT const char *jl_get_default_sysimg_path(void) { @@ -187,9 +197,9 @@ static const char opts[] = " expressions. It first tries to use BugReporting.jl installed in current environment and\n" " fallbacks to the latest compatible BugReporting.jl if not. For more information, see\n" " --bug-report=help.\n\n" - - " --heap-size-hint= Forces garbage collection if memory usage is higher than that value.\n" - " The value can be specified in units of K, M, G, T, or % of physical memory.\n\n" + " --heap-size-hint= Forces garbage collection if memory usage is higher than the given value.\n" + " The value may be specified as a number of bytes, optionally in units of\n" + " KB, MB, GB, or TB, or as a percentage of physical memory with %.\n\n" ; static const char opts_hidden[] = @@ -801,43 +811,47 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) break; case opt_heap_size_hint: if (optarg != NULL) { - size_t endof = strlen(optarg); long double value = 0.0; - if (sscanf(optarg, "%Lf", &value) == 1 && value > 1e-7) { - char unit = optarg[endof - 1]; - uint64_t multiplier = 1ull; - switch (unit) { - case 'k': - case 'K': - multiplier <<= 10; - break; - case 'm': - case 'M': - multiplier <<= 20; - break; - case 'g': - case 'G': - multiplier <<= 30; - break; - case 't': - case 'T': - multiplier <<= 40; - break; - case '%': - if (value > 100) - jl_errorf("julia: invalid percentage specified in --heap-size-hint"); - uint64_t mem = uv_get_total_memory(); - uint64_t cmem = uv_get_constrained_memory(); - if (cmem > 0 && cmem < mem) - mem = cmem; - multiplier = mem/100; - break; - default: - jl_errorf("julia: invalid unit specified in --heap-size-hint"); - break; - } - jl_options.heap_size_hint = (uint64_t)(value * multiplier); + char unit[4] = {0}; + int nparsed = sscanf(optarg, "%Lf%3s", &value, unit); + if (nparsed == 0 || strlen(unit) > 2 || (strlen(unit) == 2 && ascii_tolower(unit[1]) != 'b')) { + jl_errorf("julia: invalid argument to --heap-size-hint (%s)", optarg); + } + uint64_t multiplier = 1ull; + switch (ascii_tolower(unit[0])) { + case '\0': + case 'b': + break; + case 'k': + multiplier <<= 10; + break; + case 'm': + multiplier <<= 20; + break; + case 'g': + multiplier <<= 30; + break; + case 't': + multiplier <<= 40; + break; + case '%': + if (value > 100) + jl_errorf("julia: invalid percentage specified in --heap-size-hint"); + uint64_t mem = uv_get_total_memory(); + uint64_t cmem = uv_get_constrained_memory(); + if (cmem > 0 && cmem < mem) + mem = cmem; + multiplier = mem/100; + break; + default: + jl_errorf("julia: invalid argument to --heap-size-hint (%s)", optarg); + break; + } + long double sz = value * multiplier; + if (isnan(sz) || sz < 0) { + jl_errorf("julia: invalid argument to --heap-size-hint (%s)", optarg); } + jl_options.heap_size_hint = sz < UINT64_MAX ? (uint64_t)sz : UINT64_MAX; } if (jl_options.heap_size_hint == 0) jl_errorf("julia: invalid memory size specified in --heap-size-hint"); diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl index 3af31965e63fc..498254bb26ce0 100644 --- a/test/cmdlineargs.jl +++ b/test/cmdlineargs.jl @@ -1148,3 +1148,19 @@ if Sys.islinux() && Sys.ARCH in (:i686, :x86_64) # rr is only available on these "_RR_TRACE_DIR" => temp_trace_dir); #=stderr, stdout=#)) end end + +@testset "--heap-size-hint" begin + exename = `$(Base.julia_cmd())` + @test errors_not_signals(`$exename --heap-size-hint -e "exit(0)"`) + @testset "--heap-size-hint=$str" for str in ["asdf","","0","1.2vb","b","GB","2.5GBÌ‚","1.2gb2","42gigabytes","5gig","2GiB","NaNt"] + @test errors_not_signals(`$exename --heap-size-hint=$str -e "exit(0)"`) + end + k = 1024 + m = 1024k + g = 1024m + t = 1024g + @testset "--heap-size-hint=$str" for (str, val) in [("1", 1), ("1e7", 1e7), ("2.5e7", 2.5e7), ("1MB", 1m), ("2.5g", 2.5g), ("1e4kB", 1e4k), + ("1e100", typemax(UInt64)), ("1e500g", typemax(UInt64)), ("1e-12t", 1), ("500000000b", 500000000)] + @test parse(UInt64,read(`$exename --heap-size-hint=$str -E "Base.JLOptions().heap_size_hint"`, String)) == val + end +end From 714c6d02f8adf970a0451fffb39d6be1d8b5b564 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Sat, 24 Feb 2024 20:26:49 -0500 Subject: [PATCH 4/5] clarify keyword arg method error message (#53460) This is a nice explanatory message, so I think we should make the wording less obscure. --- base/errorshow.jl | 2 +- test/errorshow.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/base/errorshow.jl b/base/errorshow.jl index f63c150336689..9236e7da5baa6 100644 --- a/base/errorshow.jl +++ b/base/errorshow.jl @@ -330,7 +330,7 @@ function showerror(io::IO, ex::MethodError) if ex.world == typemax(UInt) || isempty(kwargs) print(io, "\nThis error has been manually thrown, explicitly, so the method may exist but be intentionally marked as unimplemented.") else - print(io, "\nThis method may not support any kwargs.") + print(io, "\nThis method may not support any keyword arguments.") end elseif hasmethod(f, arg_types) && !hasmethod(f, arg_types, world=ex.world) curworld = get_world_counter() diff --git a/test/errorshow.jl b/test/errorshow.jl index 932122682983e..2b497ed3f5f1b 100644 --- a/test/errorshow.jl +++ b/test/errorshow.jl @@ -669,7 +669,7 @@ end str = sprint(Base.showerror, MethodError(Core.kwcall, ((; a=3.0), +, 1.0, 2.0), Base.get_world_counter())) @test startswith(str, "MethodError: no method matching +(::Float64, ::Float64; a::Float64)") - @test occursin("This method may not support any kwargs", str) + @test occursin("This method may not support any keyword arguments", str) @test_throws "MethodError: no method matching kwcall()" Core.kwcall() end From b8a0a3978ccf163ce8fc371a4882547dc1271d53 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sat, 24 Feb 2024 21:48:37 -0500 Subject: [PATCH 5/5] Prevent tainting native code loading from propagating (#53457) When we use options like code coverage, we can't use the native code present in the cache file since it is not instrumented. PR #52123 introduced the capability of skipping the native code during loading, but created the issue that subsequent packages could have an explicit or implicit dependency on the native code. PR #53439 tainted the current process by setting `use_sysimage_native_code`, but this flag is propagated to subprocesses and lead to a regression in test time. Move this to a process local flag to avoid the regression. In the future we might be able to change the calling convention for cross-image calls to `invoke(ci::CodeInstance, args...)` instead of `ci.fptr(args...)` to handle native code not being present. --------- Co-authored-by: Jameson Nash --- src/staticdata.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/staticdata.c b/src/staticdata.c index c8fd4053aaf73..261042b775c14 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -3066,6 +3066,11 @@ JL_DLLEXPORT void jl_set_sysimg_so(void *handle) extern void rebuild_image_blob_tree(void); extern void export_jl_small_typeof(void); +// When an image is loaded with ignore_native, all subsequent image loads must ignore +// native code in the cache-file since we can't gurantuee that there are no call edges +// into the native code of the image. See https://github.com/JuliaLang/julia/pull/52123#issuecomment-1959965395. +int IMAGE_NATIVE_CODE_TAINTED = 0; + static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl_array_t *depmods, uint64_t checksum, /* outputs */ jl_array_t **restored, jl_array_t **init_order, jl_array_t **extext_methods, jl_array_t **internal_methods, @@ -3092,9 +3097,10 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl // in --build mode only use sysimg data, not precompiled native code int imaging_mode = jl_generating_output() && !jl_options.incremental; - if (imaging_mode || jl_options.use_sysimage_native_code != JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES) { + if (imaging_mode || jl_options.use_sysimage_native_code != JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES || IMAGE_NATIVE_CODE_TAINTED) { memset(&image->fptrs, 0, sizeof(image->fptrs)); image->gvars_base = NULL; + IMAGE_NATIVE_CODE_TAINTED = 1; } // step 1: read section map @@ -3772,7 +3778,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j // Must disable using native code in possible downstream users of this code: // https://github.com/JuliaLang/julia/pull/52123#issuecomment-1959965395. // The easiest way to do that is to disable it in all of them. - jl_options.use_sysimage_native_code = JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_NO; + IMAGE_NATIVE_CODE_TAINTED = 1; } jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_handle, pkgimg_data, &pkgimage, *plen, depmods, completeinfo, pkgname, 0);