From fc89d14c639faec779956b4e3cd873c07bd4327b Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Wed, 1 Jan 2025 14:13:01 -0800
Subject: [PATCH 1/5] Revert barrier-based LSan threading race workaround

The extra "barrier" approach was too much code whose sole purpose
was to work around a race that is not even ours (i.e. in LSan's
teardown code).

In preparation for queuing a solution taking a much-less-invasive
approach, let's revert them.
---
 Makefile             |  7 -------
 builtin/grep.c       |  8 --------
 builtin/index-pack.c |  6 ------
 ci/lib.sh            |  1 -
 thread-utils.h       | 17 -----------------
 5 files changed, 39 deletions(-)

diff --git a/Makefile b/Makefile
index 2c6dad8a7513be..97e8385b6643b9 100644
--- a/Makefile
+++ b/Makefile
@@ -141,10 +141,6 @@ include shared.mak
 #
 # Define NO_PTHREADS if you do not have or do not want to use Pthreads.
 #
-# Define THREAD_BARRIER_PTHREAD if your system has pthread_barrier_t. Barrier
-# support is optional and is only helpful when building with SANITIZE=leak, as
-# it is used to eliminate some races in the leak-checker.
-#
 # Define NO_PREAD if you have a problem with pread() system call (e.g.
 # cygwin1.dll before v1.5.22).
 #
@@ -2083,9 +2079,6 @@ ifdef NO_PTHREADS
 else
 	BASIC_CFLAGS += $(PTHREAD_CFLAGS)
 	EXTLIBS += $(PTHREAD_LIBS)
-	ifdef THREAD_BARRIER_PTHREAD
-		BASIC_CFLAGS += -DTHREAD_BARRIER_PTHREAD
-	endif
 endif
 
 ifdef HAVE_PATHS_H
diff --git a/builtin/grep.c b/builtin/grep.c
index 61b2c27490980b..d00ee76f24cfde 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -101,9 +101,6 @@ static pthread_cond_t cond_write;
 /* Signalled when we are finished with everything. */
 static pthread_cond_t cond_result;
 
-/* Synchronize the start of all threads */
-static maybe_thread_barrier_t start_barrier;
-
 static int skip_first_line;
 
 static void add_work(struct grep_opt *opt, struct grep_source *gs)
@@ -201,8 +198,6 @@ static void *run(void *arg)
 	int hit = 0;
 	struct grep_opt *opt = arg;
 
-	maybe_thread_barrier_wait(&start_barrier);
-
 	while (1) {
 		struct work_item *w = get_work();
 		if (!w)
@@ -234,7 +229,6 @@ static void start_threads(struct grep_opt *opt)
 	pthread_cond_init(&cond_add, NULL);
 	pthread_cond_init(&cond_write, NULL);
 	pthread_cond_init(&cond_result, NULL);
-	maybe_thread_barrier_init(&start_barrier, NULL, num_threads + 1);
 	grep_use_locks = 1;
 	enable_obj_read_lock();
 
@@ -254,7 +248,6 @@ static void start_threads(struct grep_opt *opt)
 			die(_("grep: failed to create thread: %s"),
 			    strerror(err));
 	}
-	maybe_thread_barrier_wait(&start_barrier);
 }
 
 static int wait_all(void)
@@ -291,7 +284,6 @@ static int wait_all(void)
 	pthread_cond_destroy(&cond_add);
 	pthread_cond_destroy(&cond_write);
 	pthread_cond_destroy(&cond_result);
-	maybe_thread_barrier_destroy(&start_barrier);
 	grep_use_locks = 0;
 	disable_obj_read_lock();
 
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 27b120f26c7a68..0b62b2589f10f9 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -185,8 +185,6 @@ static pthread_mutex_t deepest_delta_mutex;
 
 static pthread_key_t key;
 
-static maybe_thread_barrier_t start_barrier;
-
 static inline void lock_mutex(pthread_mutex_t *mutex)
 {
 	if (threads_active)
@@ -211,7 +209,6 @@ static void init_thread(void)
 	if (show_stat)
 		pthread_mutex_init(&deepest_delta_mutex, NULL);
 	pthread_key_create(&key, NULL);
-	maybe_thread_barrier_init(&start_barrier, NULL, nr_threads);
 	CALLOC_ARRAY(thread_data, nr_threads);
 	for (i = 0; i < nr_threads; i++) {
 		thread_data[i].pack_fd = xopen(curr_pack, O_RDONLY);
@@ -234,7 +231,6 @@ static void cleanup_thread(void)
 	for (i = 0; i < nr_threads; i++)
 		close(thread_data[i].pack_fd);
 	pthread_key_delete(key);
-	maybe_thread_barrier_destroy(&start_barrier);
 	free(thread_data);
 }
 
@@ -1104,8 +1100,6 @@ static int compare_ref_delta_entry(const void *a, const void *b)
 
 static void *threaded_second_pass(void *data)
 {
-	if (threads_active)
-		maybe_thread_barrier_wait(&start_barrier);
 	if (data)
 		set_thread_data(data);
 	for (;;) {
diff --git a/ci/lib.sh b/ci/lib.sh
index 6a1267fbcb3c8b..8885ee3c3f86c6 100755
--- a/ci/lib.sh
+++ b/ci/lib.sh
@@ -385,7 +385,6 @@ linux-musl)
 	;;
 linux-leaks|linux-reftable-leaks)
 	export SANITIZE=leak
-	export THREAD_BARRIER_PTHREAD=1
 	;;
 linux-asan-ubsan)
 	export SANITIZE=address,undefined
diff --git a/thread-utils.h b/thread-utils.h
index 3df5be9916de37..4961487ed914f4 100644
--- a/thread-utils.h
+++ b/thread-utils.h
@@ -53,22 +53,5 @@ int dummy_pthread_init(void *);
 int online_cpus(void);
 int init_recursive_mutex(pthread_mutex_t*);
 
-#ifdef THREAD_BARRIER_PTHREAD
-#define maybe_thread_barrier_t pthread_barrier_t
-#define maybe_thread_barrier_init pthread_barrier_init
-#define maybe_thread_barrier_wait pthread_barrier_wait
-#define maybe_thread_barrier_destroy pthread_barrier_destroy
-#else
-#define maybe_thread_barrier_t int
-static inline int maybe_thread_barrier_init(maybe_thread_barrier_t *b UNUSED,
-					    void *attr UNUSED,
-					    unsigned nr UNUSED)
-{
-	errno = ENOSYS;
-	return -1;
-}
-#define maybe_thread_barrier_wait(barrier)
-#define maybe_thread_barrier_destroy(barrier)
-#endif
 
 #endif /* THREAD_COMPAT_H */

From 5fa0c4dd296d3731bbbd1977d7bf9c50d8c4b7c1 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 1 Jan 2025 15:14:44 -0500
Subject: [PATCH 2/5] test-lib: rely on logs to detect leaks

When we run with sanitizers, we set abort_on_error=1 so that the tests
themselves can detect problems directly (when the buggy program exits
with SIGABRT). This has one blind spot, though: we don't always check
the exit codes for all programs (e.g., helpers like upload-pack invoked
behind the scenes).

For ASan and UBSan this is mostly fine; they exit as soon as they see an
error, so the unexpected abort of the program causes the test to fail
anyway.

But for LSan, the program runs to completion, since we can only check
for leaks at the end. And in that case we could miss leak reports. And
thus we started checking LSan logs in faececa53f (test-lib: have the
"check" mode for SANITIZE=leak consider leak logs, 2022-07-28).
Originally the logs were optional, but logs are generated (and checked)
always as of 8c1d6691bc (test-lib: GIT_TEST_SANITIZE_LEAK_LOG enabled by
default, 2024-07-11). And we even check them for each test snippet, as
of cf1464331b (test-lib: check for leak logs after every test,
2024-09-24).

So now aborting on error is superfluous for LSan! We can get everything
we need by checking the logs. And checking the logs is actually
preferable, since it gives us more control over silencing false
positives (something we do not yet do, but will soon).

So let's tell LSan to just exit normally, even if it finds leaks. We can
do so with exitcode=0, which also suppresses the abort_on_error flag.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/test-lib.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/t/test-lib.sh b/t/test-lib.sh
index 96f2dfb69de48c..dd2ba6e6cc4c91 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -80,6 +80,7 @@ prepend_var ASAN_OPTIONS : detect_leaks=0
 export ASAN_OPTIONS
 
 prepend_var LSAN_OPTIONS : $GIT_SAN_OPTIONS
+prepend_var LSAN_OPTIONS : exitcode=0
 prepend_var LSAN_OPTIONS : fast_unwind_on_malloc=0
 export LSAN_OPTIONS
 

From 373a4326961c504ad6365fc1e4a9082e387499c7 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 1 Jan 2025 15:17:21 -0500
Subject: [PATCH 3/5] test-lib: simplify leak-log checking

We have a function to count the number of leaks found (actually, it is
the number of processes which produced a log file). Once upon a time we
cared about seeing if this number increased between runs. But we
simplified that away in 95c679ad86 (test-lib: stop showing old leak
logs, 2024-09-24), and now we only care if it returns any results or
not.

In preparation for refactoring it further, let's drop the counting
function entirely, and roll it into the "is it empty" check. The outcome
should be the same, but we'll be free to return a boolean "did we find
anything" without worrying about somebody adding a new call to the
counting function.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/test-lib.sh | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/t/test-lib.sh b/t/test-lib.sh
index dd2ba6e6cc4c91..23eb26bfbe9c1b 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -340,17 +340,6 @@ case "$TRASH_DIRECTORY" in
  *) TRASH_DIRECTORY="$TEST_OUTPUT_DIRECTORY/$TRASH_DIRECTORY" ;;
 esac
 
-# Utility functions using $TEST_RESULTS_* variables
-nr_san_dir_leaks_ () {
-	# stderr piped to /dev/null because the directory may have
-	# been "rmdir"'d already.
-	find "$TEST_RESULTS_SAN_DIR" \
-		-type f \
-		-name "$TEST_RESULTS_SAN_FILE_PFX.*" 2>/dev/null |
-	xargs grep -lv "Unable to get registers from thread" |
-	wc -l
-}
-
 # If --stress was passed, run this test repeatedly in several parallel loops.
 if test "$GIT_TEST_STRESS_STARTED" = "done"
 then
@@ -1181,8 +1170,14 @@ test_atexit_handler () {
 }
 
 check_test_results_san_file_empty_ () {
-	test -z "$TEST_RESULTS_SAN_FILE" ||
-	test "$(nr_san_dir_leaks_)" = 0
+	test -z "$TEST_RESULTS_SAN_FILE" && return 0
+
+	# stderr piped to /dev/null because the directory may have
+	# been "rmdir"'d already.
+	! find "$TEST_RESULTS_SAN_DIR" \
+		-type f \
+		-name "$TEST_RESULTS_SAN_FILE_PFX.*" 2>/dev/null |
+	xargs grep -qv "Unable to get registers from thread"
 }
 
 check_test_results_san_file_ () {

From 6fb8cb3d685382089a2e34ba35a30e898d63ab26 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 1 Jan 2025 15:18:28 -0500
Subject: [PATCH 4/5] test-lib: check leak logs for presence of DEDUP_TOKEN

When we check the leak logs, our original strategy was to check for any
non-empty log file produced by LSan. We later amended that to ignore
noisy lines in 370ef7e40d (test-lib: ignore uninteresting LSan output,
2023-08-28).

This makes it hard to ignore noise which is more than a single line;
we'd have to actually parse the file to determine the meaning of each
line.

But there's an easy line-oriented solution. Because we always pass the
dedup_token_length option, the output will contain a DEDUP_TOKEN line
for each leak that has been found. So if we invert our strategy to stop
ignoring useless lines and only look for useful ones, we can just count
the number of DEDUP_TOKEN lines. If it's non-zero, then we found at
least one leak (it would even give us a count of unique leaks, but we
really only care if it is non-zero).

This should yield the same outcome, but will help us build more false
positive detection on top.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/test-lib.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/test-lib.sh b/t/test-lib.sh
index 23eb26bfbe9c1b..c9487d080518f5 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1177,7 +1177,7 @@ check_test_results_san_file_empty_ () {
 	! find "$TEST_RESULTS_SAN_DIR" \
 		-type f \
 		-name "$TEST_RESULTS_SAN_FILE_PFX.*" 2>/dev/null |
-	xargs grep -qv "Unable to get registers from thread"
+	xargs grep -q ^DEDUP_TOKEN
 }
 
 check_test_results_san_file_ () {

From b119a687d411864433aed92017c144d311b53a4c Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 1 Jan 2025 15:21:24 -0500
Subject: [PATCH 5/5] test-lib: ignore leaks in the sanitizer's thread code

Our CI jobs sometimes see false positive leaks like this:

        =================================================================
        ==3904583==ERROR: LeakSanitizer: detected memory leaks

        Direct leak of 32 byte(s) in 1 object(s) allocated from:
            #0 0x7fa790d01986 in __interceptor_realloc ../../../../src/libsanitizer/lsan/lsan_interceptors.cpp:98
            #1 0x7fa790add769 in __pthread_getattr_np nptl/pthread_getattr_np.c:180
            #2 0x7fa790d117c5 in __sanitizer::GetThreadStackTopAndBottom(bool, unsigned long*, unsigned long*) ../../../../src/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp:150
            #3 0x7fa790d11957 in __sanitizer::GetThreadStackAndTls(bool, unsigned long*, unsigned long*, unsigned long*, unsigned long*) ../../../../src/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp:598
            #4 0x7fa790d03fe8 in __lsan::ThreadStart(unsigned int, unsigned long long, __sanitizer::ThreadType) ../../../../src/libsanitizer/lsan/lsan_posix.cpp:51
            #5 0x7fa790d013fd in __lsan_thread_start_func ../../../../src/libsanitizer/lsan/lsan_interceptors.cpp:440
            #6 0x7fa790adc3eb in start_thread nptl/pthread_create.c:444
            #7 0x7fa790b5ca5b in clone3 ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81

This is not a leak in our code, but appears to be a race between one
thread calling exit() while another one is in LSan's stack setup code.
You can reproduce it easily by running t0003 or t5309 with --stress
(these trigger it because of the threading in git-grep and index-pack
respectively).

This may be a bug in LSan, but regardless of whether it is eventually
fixed, it is useful to work around it so that we stop seeing these false
positives.

We can recognize it by the mention of the sanitizer functions in the
DEDUP_TOKEN line. With this patch, the scripts mentioned above should
run with --stress indefinitely.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/test-lib.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/t/test-lib.sh b/t/test-lib.sh
index c9487d080518f5..d1f62adbf82931 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1177,7 +1177,8 @@ check_test_results_san_file_empty_ () {
 	! find "$TEST_RESULTS_SAN_DIR" \
 		-type f \
 		-name "$TEST_RESULTS_SAN_FILE_PFX.*" 2>/dev/null |
-	xargs grep -q ^DEDUP_TOKEN
+	xargs grep ^DEDUP_TOKEN |
+	grep -qv sanitizer::GetThreadStackTopAndBottom
 }
 
 check_test_results_san_file_ () {