From fc89d14c639faec779956b4e3cd873c07bd4327b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 1 Jan 2025 14:13:01 -0800 Subject: [PATCH 1/5] Revert barrier-based LSan threading race workaround The extra "barrier" approach was too much code whose sole purpose was to work around a race that is not even ours (i.e. in LSan's teardown code). In preparation for queuing a solution taking a much-less-invasive approach, let's revert them. --- Makefile | 7 ------- builtin/grep.c | 8 -------- builtin/index-pack.c | 6 ------ ci/lib.sh | 1 - thread-utils.h | 17 ----------------- 5 files changed, 39 deletions(-) diff --git a/Makefile b/Makefile index 2c6dad8a7513be..97e8385b6643b9 100644 --- a/Makefile +++ b/Makefile @@ -141,10 +141,6 @@ include shared.mak # # Define NO_PTHREADS if you do not have or do not want to use Pthreads. # -# Define THREAD_BARRIER_PTHREAD if your system has pthread_barrier_t. Barrier -# support is optional and is only helpful when building with SANITIZE=leak, as -# it is used to eliminate some races in the leak-checker. -# # Define NO_PREAD if you have a problem with pread() system call (e.g. # cygwin1.dll before v1.5.22). # @@ -2083,9 +2079,6 @@ ifdef NO_PTHREADS else BASIC_CFLAGS += $(PTHREAD_CFLAGS) EXTLIBS += $(PTHREAD_LIBS) - ifdef THREAD_BARRIER_PTHREAD - BASIC_CFLAGS += -DTHREAD_BARRIER_PTHREAD - endif endif ifdef HAVE_PATHS_H diff --git a/builtin/grep.c b/builtin/grep.c index 61b2c27490980b..d00ee76f24cfde 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -101,9 +101,6 @@ static pthread_cond_t cond_write; /* Signalled when we are finished with everything. */ static pthread_cond_t cond_result; -/* Synchronize the start of all threads */ -static maybe_thread_barrier_t start_barrier; - static int skip_first_line; static void add_work(struct grep_opt *opt, struct grep_source *gs) @@ -201,8 +198,6 @@ static void *run(void *arg) int hit = 0; struct grep_opt *opt = arg; - maybe_thread_barrier_wait(&start_barrier); - while (1) { struct work_item *w = get_work(); if (!w) @@ -234,7 +229,6 @@ static void start_threads(struct grep_opt *opt) pthread_cond_init(&cond_add, NULL); pthread_cond_init(&cond_write, NULL); pthread_cond_init(&cond_result, NULL); - maybe_thread_barrier_init(&start_barrier, NULL, num_threads + 1); grep_use_locks = 1; enable_obj_read_lock(); @@ -254,7 +248,6 @@ static void start_threads(struct grep_opt *opt) die(_("grep: failed to create thread: %s"), strerror(err)); } - maybe_thread_barrier_wait(&start_barrier); } static int wait_all(void) @@ -291,7 +284,6 @@ static int wait_all(void) pthread_cond_destroy(&cond_add); pthread_cond_destroy(&cond_write); pthread_cond_destroy(&cond_result); - maybe_thread_barrier_destroy(&start_barrier); grep_use_locks = 0; disable_obj_read_lock(); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 27b120f26c7a68..0b62b2589f10f9 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -185,8 +185,6 @@ static pthread_mutex_t deepest_delta_mutex; static pthread_key_t key; -static maybe_thread_barrier_t start_barrier; - static inline void lock_mutex(pthread_mutex_t *mutex) { if (threads_active) @@ -211,7 +209,6 @@ static void init_thread(void) if (show_stat) pthread_mutex_init(&deepest_delta_mutex, NULL); pthread_key_create(&key, NULL); - maybe_thread_barrier_init(&start_barrier, NULL, nr_threads); CALLOC_ARRAY(thread_data, nr_threads); for (i = 0; i < nr_threads; i++) { thread_data[i].pack_fd = xopen(curr_pack, O_RDONLY); @@ -234,7 +231,6 @@ static void cleanup_thread(void) for (i = 0; i < nr_threads; i++) close(thread_data[i].pack_fd); pthread_key_delete(key); - maybe_thread_barrier_destroy(&start_barrier); free(thread_data); } @@ -1104,8 +1100,6 @@ static int compare_ref_delta_entry(const void *a, const void *b) static void *threaded_second_pass(void *data) { - if (threads_active) - maybe_thread_barrier_wait(&start_barrier); if (data) set_thread_data(data); for (;;) { diff --git a/ci/lib.sh b/ci/lib.sh index 6a1267fbcb3c8b..8885ee3c3f86c6 100755 --- a/ci/lib.sh +++ b/ci/lib.sh @@ -385,7 +385,6 @@ linux-musl) ;; linux-leaks|linux-reftable-leaks) export SANITIZE=leak - export THREAD_BARRIER_PTHREAD=1 ;; linux-asan-ubsan) export SANITIZE=address,undefined diff --git a/thread-utils.h b/thread-utils.h index 3df5be9916de37..4961487ed914f4 100644 --- a/thread-utils.h +++ b/thread-utils.h @@ -53,22 +53,5 @@ int dummy_pthread_init(void *); int online_cpus(void); int init_recursive_mutex(pthread_mutex_t*); -#ifdef THREAD_BARRIER_PTHREAD -#define maybe_thread_barrier_t pthread_barrier_t -#define maybe_thread_barrier_init pthread_barrier_init -#define maybe_thread_barrier_wait pthread_barrier_wait -#define maybe_thread_barrier_destroy pthread_barrier_destroy -#else -#define maybe_thread_barrier_t int -static inline int maybe_thread_barrier_init(maybe_thread_barrier_t *b UNUSED, - void *attr UNUSED, - unsigned nr UNUSED) -{ - errno = ENOSYS; - return -1; -} -#define maybe_thread_barrier_wait(barrier) -#define maybe_thread_barrier_destroy(barrier) -#endif #endif /* THREAD_COMPAT_H */ From 5fa0c4dd296d3731bbbd1977d7bf9c50d8c4b7c1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 1 Jan 2025 15:14:44 -0500 Subject: [PATCH 2/5] test-lib: rely on logs to detect leaks When we run with sanitizers, we set abort_on_error=1 so that the tests themselves can detect problems directly (when the buggy program exits with SIGABRT). This has one blind spot, though: we don't always check the exit codes for all programs (e.g., helpers like upload-pack invoked behind the scenes). For ASan and UBSan this is mostly fine; they exit as soon as they see an error, so the unexpected abort of the program causes the test to fail anyway. But for LSan, the program runs to completion, since we can only check for leaks at the end. And in that case we could miss leak reports. And thus we started checking LSan logs in faececa53f (test-lib: have the "check" mode for SANITIZE=leak consider leak logs, 2022-07-28). Originally the logs were optional, but logs are generated (and checked) always as of 8c1d6691bc (test-lib: GIT_TEST_SANITIZE_LEAK_LOG enabled by default, 2024-07-11). And we even check them for each test snippet, as of cf1464331b (test-lib: check for leak logs after every test, 2024-09-24). So now aborting on error is superfluous for LSan! We can get everything we need by checking the logs. And checking the logs is actually preferable, since it gives us more control over silencing false positives (something we do not yet do, but will soon). So let's tell LSan to just exit normally, even if it finds leaks. We can do so with exitcode=0, which also suppresses the abort_on_error flag. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/test-lib.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/t/test-lib.sh b/t/test-lib.sh index 96f2dfb69de48c..dd2ba6e6cc4c91 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -80,6 +80,7 @@ prepend_var ASAN_OPTIONS : detect_leaks=0 export ASAN_OPTIONS prepend_var LSAN_OPTIONS : $GIT_SAN_OPTIONS +prepend_var LSAN_OPTIONS : exitcode=0 prepend_var LSAN_OPTIONS : fast_unwind_on_malloc=0 export LSAN_OPTIONS From 373a4326961c504ad6365fc1e4a9082e387499c7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 1 Jan 2025 15:17:21 -0500 Subject: [PATCH 3/5] test-lib: simplify leak-log checking We have a function to count the number of leaks found (actually, it is the number of processes which produced a log file). Once upon a time we cared about seeing if this number increased between runs. But we simplified that away in 95c679ad86 (test-lib: stop showing old leak logs, 2024-09-24), and now we only care if it returns any results or not. In preparation for refactoring it further, let's drop the counting function entirely, and roll it into the "is it empty" check. The outcome should be the same, but we'll be free to return a boolean "did we find anything" without worrying about somebody adding a new call to the counting function. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/test-lib.sh | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/t/test-lib.sh b/t/test-lib.sh index dd2ba6e6cc4c91..23eb26bfbe9c1b 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -340,17 +340,6 @@ case "$TRASH_DIRECTORY" in *) TRASH_DIRECTORY="$TEST_OUTPUT_DIRECTORY/$TRASH_DIRECTORY" ;; esac -# Utility functions using $TEST_RESULTS_* variables -nr_san_dir_leaks_ () { - # stderr piped to /dev/null because the directory may have - # been "rmdir"'d already. - find "$TEST_RESULTS_SAN_DIR" \ - -type f \ - -name "$TEST_RESULTS_SAN_FILE_PFX.*" 2>/dev/null | - xargs grep -lv "Unable to get registers from thread" | - wc -l -} - # If --stress was passed, run this test repeatedly in several parallel loops. if test "$GIT_TEST_STRESS_STARTED" = "done" then @@ -1181,8 +1170,14 @@ test_atexit_handler () { } check_test_results_san_file_empty_ () { - test -z "$TEST_RESULTS_SAN_FILE" || - test "$(nr_san_dir_leaks_)" = 0 + test -z "$TEST_RESULTS_SAN_FILE" && return 0 + + # stderr piped to /dev/null because the directory may have + # been "rmdir"'d already. + ! find "$TEST_RESULTS_SAN_DIR" \ + -type f \ + -name "$TEST_RESULTS_SAN_FILE_PFX.*" 2>/dev/null | + xargs grep -qv "Unable to get registers from thread" } check_test_results_san_file_ () { From 6fb8cb3d685382089a2e34ba35a30e898d63ab26 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 1 Jan 2025 15:18:28 -0500 Subject: [PATCH 4/5] test-lib: check leak logs for presence of DEDUP_TOKEN When we check the leak logs, our original strategy was to check for any non-empty log file produced by LSan. We later amended that to ignore noisy lines in 370ef7e40d (test-lib: ignore uninteresting LSan output, 2023-08-28). This makes it hard to ignore noise which is more than a single line; we'd have to actually parse the file to determine the meaning of each line. But there's an easy line-oriented solution. Because we always pass the dedup_token_length option, the output will contain a DEDUP_TOKEN line for each leak that has been found. So if we invert our strategy to stop ignoring useless lines and only look for useful ones, we can just count the number of DEDUP_TOKEN lines. If it's non-zero, then we found at least one leak (it would even give us a count of unique leaks, but we really only care if it is non-zero). This should yield the same outcome, but will help us build more false positive detection on top. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/test-lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/test-lib.sh b/t/test-lib.sh index 23eb26bfbe9c1b..c9487d080518f5 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1177,7 +1177,7 @@ check_test_results_san_file_empty_ () { ! find "$TEST_RESULTS_SAN_DIR" \ -type f \ -name "$TEST_RESULTS_SAN_FILE_PFX.*" 2>/dev/null | - xargs grep -qv "Unable to get registers from thread" + xargs grep -q ^DEDUP_TOKEN } check_test_results_san_file_ () { From b119a687d411864433aed92017c144d311b53a4c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 1 Jan 2025 15:21:24 -0500 Subject: [PATCH 5/5] test-lib: ignore leaks in the sanitizer's thread code Our CI jobs sometimes see false positive leaks like this: ================================================================= ==3904583==ERROR: LeakSanitizer: detected memory leaks Direct leak of 32 byte(s) in 1 object(s) allocated from: #0 0x7fa790d01986 in __interceptor_realloc ../../../../src/libsanitizer/lsan/lsan_interceptors.cpp:98 #1 0x7fa790add769 in __pthread_getattr_np nptl/pthread_getattr_np.c:180 #2 0x7fa790d117c5 in __sanitizer::GetThreadStackTopAndBottom(bool, unsigned long*, unsigned long*) ../../../../src/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp:150 #3 0x7fa790d11957 in __sanitizer::GetThreadStackAndTls(bool, unsigned long*, unsigned long*, unsigned long*, unsigned long*) ../../../../src/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp:598 #4 0x7fa790d03fe8 in __lsan::ThreadStart(unsigned int, unsigned long long, __sanitizer::ThreadType) ../../../../src/libsanitizer/lsan/lsan_posix.cpp:51 #5 0x7fa790d013fd in __lsan_thread_start_func ../../../../src/libsanitizer/lsan/lsan_interceptors.cpp:440 #6 0x7fa790adc3eb in start_thread nptl/pthread_create.c:444 #7 0x7fa790b5ca5b in clone3 ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 This is not a leak in our code, but appears to be a race between one thread calling exit() while another one is in LSan's stack setup code. You can reproduce it easily by running t0003 or t5309 with --stress (these trigger it because of the threading in git-grep and index-pack respectively). This may be a bug in LSan, but regardless of whether it is eventually fixed, it is useful to work around it so that we stop seeing these false positives. We can recognize it by the mention of the sanitizer functions in the DEDUP_TOKEN line. With this patch, the scripts mentioned above should run with --stress indefinitely. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/test-lib.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/t/test-lib.sh b/t/test-lib.sh index c9487d080518f5..d1f62adbf82931 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1177,7 +1177,8 @@ check_test_results_san_file_empty_ () { ! find "$TEST_RESULTS_SAN_DIR" \ -type f \ -name "$TEST_RESULTS_SAN_FILE_PFX.*" 2>/dev/null | - xargs grep -q ^DEDUP_TOKEN + xargs grep ^DEDUP_TOKEN | + grep -qv sanitizer::GetThreadStackTopAndBottom } check_test_results_san_file_ () {