From 48f3f820783a6c6114bb29efc76affc2388f91d4 Mon Sep 17 00:00:00 2001 From: Satya Bodapati Date: Tue, 2 Apr 2024 15:20:00 +0100 Subject: [PATCH] PXB-3113 : Improve debug sync framework to allow PXB to pause and resume threads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://perconadev.atlassian.net/browse/PXB-3113 The current debug-sync option in PXB completely suspends PXB process and user can resume by sending SIGCONT signal This is useful for scenarios where PXB is paused and do certain operations on server and then resume PXB to complete. But many bugs we found during testing, involves multiple threads in PXB. The goal of this work is to be able to pause and resume the thread. Since many tests use the existing debug-sync option, I dont want to disturb these tests. We can convert them to the new mechanism later. How to use? ----------- The new mechanism is used with option --debug-sync-thread="sync_point_name" In the code place a debug_sync_thread(“debug_point_1”) to stop thread at this place. You can pass the debug_sync point via commandline --debug-sync-thread=”debug_sync_point1” PXB will create a file of the debug_sync point name in the backup directory. It is suffixed with a threadnumber. Please ensure that no two debug_sync points use same name (it doesn’t make sense to have two sync points with same name) ``` 2024-03-28T15:58:23.310386-00:00 0 [Note] [MY-011825] [Xtrabackup] DEBUG_SYNC_THREAD: sleeping 1sec. Resume this thread by deleting file /home/satya/WORK/pxb/bld/backup//xb_before_file_copy_4860396430306702017 ``` In the test, after activating syncpoint, you can use wait_for_debug_sync_thread_point Do some stuff now. This thread is sleeping. Once you are done, and if you want the thread to resume, you can do so by deleting the file 'rm backup_dir/sync_point_name_*` Please use resume_debug_sync_thread_point . It dletes the syncpoint file and additionally checks that syncpoint is indeed resumed. More common/complicated scenario: ---------------------------------- The scenario is to signal another thread to stop after reaching the first sync point. To achieve this. Do steps 1 to 3 (above) Echo the debug_sync point name into a file named “xb_debug_sync_thread”. Example: 4. echo "xtrabackup_copy_logfile_pause" > backup/xb_debug_sync_thread 5. send SIGUSR1 signal to PXB process. kill -SIGUSR1 496102 6. Wait for syncpoint to be reached. wait_for_debug_sync_thread PXB acknowledges it 2024-03-28T16:05:07.849926-00:00 0 [Note] [MY-011825] [Xtrabackup] SIGUSR1 received. Reading debug_sync point from xb_debug_sync_thread file in backup directory 2024-03-28T16:05:07.850004-00:00 0 [Note] [MY-011825] [Xtrabackup] DEBUG_SYNC_THREAD: Deleting file/home/satya/WORK/pxb/bld/backup//xb_debug_sync_thread and then prints this once the sync point is reached. 2024-03-28T16:05:08.508830-00:00 1 [Note] [MY-011825] [Xtrabackup] DEBUG_SYNC_THREAD: sleeping 1sec. Resume this thread by deleting file /home/satya/WORK/pxb/bld/backup//xb_xtrabackup_copy_logfile_pause_10389933572825668634 At this point, we have two threads sleeping at two sync points. Either of them can be resumed by deleting the filenames mentioned in the error log. (Or use resume_debug_sync_thread()) --- storage/innobase/include/xb0xb.h | 5 + storage/innobase/xtrabackup/src/redo_log.cc | 1 + storage/innobase/xtrabackup/src/xtrabackup.cc | 101 ++++++++++++++++++ .../innobase/xtrabackup/test/inc/common.sh | 98 +++++++++++++++++ 4 files changed, 205 insertions(+) diff --git a/storage/innobase/include/xb0xb.h b/storage/innobase/include/xb0xb.h index f42df09b9ae8..0dd0074035a2 100644 --- a/storage/innobase/include/xb0xb.h +++ b/storage/innobase/include/xb0xb.h @@ -103,6 +103,11 @@ const std::string KEYRING_NOT_LOADED = @param[in] name sync point name */ void debug_sync_point(const char *name); +/** Pause xtrabackup thread and wait for resume. +Thread can be resumed by deleting the sync_point filename +@param[in] name sync point name */ +void debug_sync_thread(const char *name); + extern char *xtrabackup_debug_sync; #endif diff --git a/storage/innobase/xtrabackup/src/redo_log.cc b/storage/innobase/xtrabackup/src/redo_log.cc index 069608fafe29..13d4f329cc3a 100644 --- a/storage/innobase/xtrabackup/src/redo_log.cc +++ b/storage/innobase/xtrabackup/src/redo_log.cc @@ -1207,6 +1207,7 @@ void Redo_Log_Data_Manager::copy_func() { xb::info() << ">> log scanned up to (" << reader.get_scanned_lsn() << ")"; debug_sync_point("xtrabackup_copy_logfile_pause"); + debug_sync_thread("xtrabackup_copy_logfile_pause"); os_event_reset(event); os_event_wait_time_low(event, std::chrono::milliseconds{copy_interval}, diff --git a/storage/innobase/xtrabackup/src/xtrabackup.cc b/storage/innobase/xtrabackup/src/xtrabackup.cc index d40e8ce3af13..0f31a7eb438a 100644 --- a/storage/innobase/xtrabackup/src/xtrabackup.cc +++ b/storage/innobase/xtrabackup/src/xtrabackup.cc @@ -364,6 +364,10 @@ it every INNOBASE_WAKE_INTERVAL'th step. */ ulong innobase_active_counter = 0; char *xtrabackup_debug_sync = NULL; +#ifdef UNIV_DEBUG +char *xtrabackup_debug_sync_thread = NULL; +static std::string debug_sync_file_content; +#endif /* UNIV_DEBUG */ static const char *dbug_setting = nullptr; bool xtrabackup_incremental_force_scan = false; @@ -685,6 +689,9 @@ enum options_xtrabackup { OPT_INNODB_REDO_LOG_ENCRYPT, OPT_INNODB_UNDO_LOG_ENCRYPT, OPT_XTRA_DEBUG_SYNC, +#ifdef UNIV_DEBUG + OPT_XTRA_DEBUG_SYNC_THREAD, +#endif /* UNIV_DEBUG */ OPT_XTRA_COMPACT, OPT_XTRA_REBUILD_INDEXES, OPT_XTRA_REBUILD_THREADS, @@ -1559,6 +1566,18 @@ Disable with --skip-innodb-checksums.", "Debug sync point. This is only used by the xtrabackup test suite", (G_PTR *)&xtrabackup_debug_sync, (G_PTR *)&xtrabackup_debug_sync, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + +#ifdef UNIV_DEBUG + {"debug-sync-thread", OPT_XTRA_DEBUG_SYNC_THREAD, + "Thread sleeps at sync point and creates a filename with the " + "sync_point_name_. There can be multiple threads" + "Delete the file to resume the thread. This is only used by the " + "xtrabackup test suite", + (G_PTR *)&xtrabackup_debug_sync_thread, + (G_PTR *)&xtrabackup_debug_sync_thread, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, + 0, 0, 0}, +#endif /* UNIV_DEBUG */ + #endif #ifndef NDEBUG @@ -1658,6 +1677,37 @@ static void sigcont_handler(int sig); static void sigcont_handler(int sig __attribute__((unused))) { debug_sync_resumed = 1; } + +#ifdef UNIV_DEBUG +static void sigusr1_handler(int sig __attribute__((unused))) { + xb::info() << "SIGUSR1 received. Reading debug_sync point from " + "xb_debug_sync_thread file in backup directory"; + + std::string debug_sync_file = + std::string(xtrabackup_target_dir) + "/xb_debug_sync_thread"; + std::ifstream ifs(debug_sync_file); + + if (ifs.fail()) { + xb::warn() << "SIGUSR1 signal sent but the xb_debug_sync_thread file with" + "a debug sync point name in it is not present at " + << xtrabackup_target_dir; + return; + } + + debug_sync_file_content.assign((std::istreambuf_iterator(ifs)), + (std::istreambuf_iterator())); + debug_sync_file_content.erase( + std::remove(debug_sync_file_content.begin(), + debug_sync_file_content.end(), '\n'), + debug_sync_file_content.cend()); + *const_cast(&xtrabackup_debug_sync_thread) = + debug_sync_file_content.c_str(); + + xb::info() << "DEBUG_SYNC_THREAD: Deleting file" << debug_sync_file; + os_file_delete(0, debug_sync_file.c_str()); +} +#endif /* UNIV_DEBUG */ + #endif void debug_sync_point(const char *name) { @@ -1701,6 +1751,52 @@ void debug_sync_point(const char *name) { #endif } +#ifdef UNIV_DEBUG +void debug_sync_thread(const char *name) { +#ifndef __WIN__ + FILE *fp; + char pid_path[FN_REFLEN]; + + if (xtrabackup_debug_sync_thread == nullptr) { + return; + } + + if (strcmp(xtrabackup_debug_sync_thread, name)) { + return; + } + + auto thread_id = std::hash{}(std::this_thread::get_id()); + + snprintf(pid_path, sizeof(pid_path), "%s/%s_%lu", xtrabackup_target_dir, name, + thread_id); + + fp = fopen(pid_path, "w"); + if (fp == NULL) { + xb::error() << "cannot open " << pid_path; + exit(EXIT_FAILURE); + } + + fclose(fp); + + while (1) { + xb::info() << "DEBUG_SYNC_THREAD: sleeping 1sec. Resume this thread by " + "deleting file " + << pid_path; + sleep(1); + bool exists = os_file_exists(pid_path); + if (!exists) break; + } + + xb::info() << "DEBUG_SYNC_THREAD: thread " << thread_id + << " resumed from sync point: " << name; + + *const_cast(&xtrabackup_debug_sync_thread) = nullptr; + +#endif +} + +#endif /* UNIV_DEBUG */ + static const char *xb_client_default_groups[] = {"xtrabackup", "client", 0, 0, 0}; @@ -4221,6 +4317,8 @@ void xtrabackup_backup_func(void) { exit(EXIT_FAILURE); } + debug_sync_thread("before_file_copy"); + /* Create data copying threads */ data_threads = (data_thread_ctxt_t *)ut::malloc_withkey( UT_NEW_THIS_FILE_PSI_KEY, @@ -7605,6 +7703,9 @@ int main(int argc, char **argv) { #ifndef __WIN__ signal(SIGCONT, sigcont_handler); +#ifdef UNIV_DEBUG + signal(SIGUSR1, sigusr1_handler); +#endif /* UNIV_DEBUG */ #endif /* --backup */ diff --git a/storage/innobase/xtrabackup/test/inc/common.sh b/storage/innobase/xtrabackup/test/inc/common.sh index cea3bdefd60e..dfe0c304787b 100644 --- a/storage/innobase/xtrabackup/test/inc/common.sh +++ b/storage/innobase/xtrabackup/test/inc/common.sh @@ -28,6 +28,30 @@ function xtrabackup() fi } +function xtrabackup_background() { + # Check if XB_ERROR_LOG is set + if [ -z "${XB_ERROR_LOG+x}" ]; then + echo "Error: XB_ERROR_LOG variable is not set. Please set XB_ERROR_LOG to the desired error log file path." >&2 + return 1 + fi + + # Check if the number of arguments passed is correct + if [ $# -lt 1 ]; then + echo "Usage: xtrabackup_background [args...]" >&2 + return 1 + fi + + local args="$@" # Combine all arguments to form the xtrabackup command + + # Execute xtrabackup in the background using the run_cmd_background function and store the PID in XB_PID + run_cmd_background $XB_BIN $XB_ARGS $args + XB_PID=$! + + # Optionally, return the PID of the background process + echo $XB_PID +} + + function rr_xtrabackup() { run_cmd rr $XB_BIN $XB_ARGS "$@" @@ -135,6 +159,32 @@ function run_cmd_expect_failure() fi } +function run_cmd_background() { + # Check if XB_ERROR_LOG is set + if [ -z "${XB_ERROR_LOG+x}" ]; then + echo "Error: XB_ERROR_LOG variable is not set. Please set XB_ERROR_LOG to the desired error log file path." >&2 + return 1 + fi + + # Check if the number of arguments passed is correct + if [ $# -lt 1 ]; then + echo "Usage: run_cmd_background [args...]" >&2 + return 1 + fi + + local cmd="$@" # Combine all arguments to form the command + + # Execute the command in the background, piping stdout to tee to duplicate the output to both the error log file and stdout + # Dont use PIPE here as the PID will be pid of pipe + $cmd > >(tee -a "$XB_ERROR_LOG") 2>&1 & + + # Capture the PID of the background process and store it in JOB_ID + CMD_PID=$! + + # Optionally, return the PID of the command + echo $CMD_PID +} + function load_sakila() { vlog "Loading sakila" @@ -1462,5 +1512,53 @@ function wait_for_file_to_generated() { done } +function wait_for_debug_sync_thread() +{ + # Check if the number of arguments passed is correct + if [ $# -lt 1 ]; then + echo "Usage: wait_for_debug_sync_thread " + return 1 + fi + + local sync_point=$1 + if [ -z "${XB_ERROR_LOG+x}" ]; then + echo "Error: XB_ERROR_LOG variable is not set. Please set XB_ERROR_LOG to the desired error log file path." >&2 + return 1 + fi + + while ! egrep -q "DEBUG_SYNC_THREAD: sleeping 1sec. Resume this thread by deleting file.*$sync_point.*" ${XB_ERROR_LOG} ; do + sleep 1 + vlog "waiting for debug_sync_thread $sync_point in $XB_ERROR_LOG" + done +} + +function resume_debug_sync_thread() +{ + # Check if the number of arguments passed is correct + if [ $# -lt 2 ]; then + echo "Usage: resume_debug_sync_thread " + return 1 + fi + + local sync_point=$1 + local backup_dir=$2 + + if [ -z "${XB_ERROR_LOG+x}" ]; then + echo "Error: XB_ERROR_LOG variable is not set. Please set XB_ERROR_LOG to the desired error log file path." >&2 + return 1 + fi + + echo "Resume from sync point: $sync_point" + rm $backup_dir/"$sync_point"_* + + echo "wait for resumed signal of $sync_point" + + while ! egrep -q "DEBUG_SYNC_THREAD: thread .* resumed from sync point: $sync_point" ${XB_ERROR_LOG} ; do + sleep 1 + vlog "waiting for debug_sync_thread point resume $sync_point in pxb $XB_ERROR_LOG" + done + +} + # To avoid unbound variable error when no server have been started SRV_MYSQLD_IDS=