Skip to content

Commit

Permalink
PXB-3113 : Improve debug sync framework to allow PXB to pause and res…
Browse files Browse the repository at this point in the history
…ume threads

https://perconadev.atlassian.net/browse/PXB-3113

The current debug-sync option in PXB completely suspends PXB process and user can resume by sending SIGCONT signal
This is useful for scenarios where PXB is paused and do certain operations on server and then resume PXB to complete.

But many bugs we found during testing, involves multiple threads in PXB. The goal of this work is to be able to
pause and resume the thread.

Since many tests use the existing debug-sync option, I dont want to disturb these tests. We can convert them to
the new mechanism later.

How to use?
-----------
The new mechanism is used with option --debug-sync-thread="sync_point_name"

In the code place a debug_sync_thread(“debug_point_1”) to stop thread at this place.

You can pass the debug_sync point via commandline --debug-sync-thread=”debug_sync_point1”

PXB will create a file of the debug_sync point name in the backup directory. It is suffixed with a threadnumber.
Please ensure that no two debug_sync points use same name (it doesn’t make sense to have two sync points with same name)

```
2024-03-28T15:58:23.310386-00:00 0 [Note] [MY-011825] [Xtrabackup] DEBUG_SYNC_THREAD: sleeping 1sec.  Resume this thread by deleting file /home/satya/WORK/pxb/bld/backup//xb_before_file_copy_4860396430306702017
```
In the test, after activating syncpoint, you can use wait_for_debug_sync_thread_point <syncpoint_name>

Do some stuff now. This thread is sleeping.

Once you are done, and if you want the thread to resume, you can do so by deleting the file 'rm backup_dir/sync_point_name_*`
Please use resume_debug_sync_thread_point <syncpoint_name> <backup_dir>. It dletes the syncpoint file and additionally checks that syncpoint is
indeed resumed.

More common/complicated scenario:
----------------------------------
The scenario is to signal another thread to stop after reaching the first sync point. To achieve this. Do steps 1 to 3 (above)

Echo the debug_sync point name into a file named “xb_debug_sync_thread”. Example:

4. echo "xtrabackup_copy_logfile_pause" > backup/xb_debug_sync_thread

5. send SIGUSR1 signal to PXB process. kill -SIGUSR1 496102

6. Wait for syncpoint to be reached. wait_for_debug_sync_thread <syncpoint_name>

PXB acknowledges it
2024-03-28T16:05:07.849926-00:00 0 [Note] [MY-011825] [Xtrabackup] SIGUSR1 received. Reading debug_sync point from xb_debug_sync_thread file in backup directory
2024-03-28T16:05:07.850004-00:00 0 [Note] [MY-011825] [Xtrabackup] DEBUG_SYNC_THREAD: Deleting  file/home/satya/WORK/pxb/bld/backup//xb_debug_sync_thread

and then prints this once the sync point is reached.
2024-03-28T16:05:08.508830-00:00 1 [Note] [MY-011825] [Xtrabackup] DEBUG_SYNC_THREAD: sleeping 1sec.  Resume this thread by deleting file /home/satya/WORK/pxb/bld/backup//xb_xtrabackup_copy_logfile_pause_10389933572825668634

At this point, we have two threads sleeping at two sync points. Either of them can be resumed by deleting the filenames mentioned in the error log.
(Or use resume_debug_sync_thread())
  • Loading branch information
satya-bodapati committed Apr 3, 2024
1 parent db75067 commit 7364061
Show file tree
Hide file tree
Showing 4 changed files with 205 additions and 0 deletions.
5 changes: 5 additions & 0 deletions storage/innobase/include/xb0xb.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ const std::string KEYRING_NOT_LOADED =
@param[in] name sync point name */
void debug_sync_point(const char *name);

/** Pause xtrabackup thread and wait for resume.
Thread can be resumed by deleting the sync_point filename
@param[in] name sync point name */
void debug_sync_thread(const char *name);

extern char *xtrabackup_debug_sync;

#endif
1 change: 1 addition & 0 deletions storage/innobase/xtrabackup/src/redo_log.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1207,6 +1207,7 @@ void Redo_Log_Data_Manager::copy_func() {
xb::info() << ">> log scanned up to (" << reader.get_scanned_lsn() << ")";

debug_sync_point("xtrabackup_copy_logfile_pause");
debug_sync_thread("xtrabackup_copy_logfile_pause");

os_event_reset(event);
os_event_wait_time_low(event, std::chrono::milliseconds{copy_interval},
Expand Down
101 changes: 101 additions & 0 deletions storage/innobase/xtrabackup/src/xtrabackup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,10 @@ it every INNOBASE_WAKE_INTERVAL'th step. */
ulong innobase_active_counter = 0;

char *xtrabackup_debug_sync = NULL;
#ifdef UNIV_DEBUG
char *xtrabackup_debug_sync_thread = NULL;
static std::string debug_sync_file_content;
#endif /* UNIV_DEBUG */
static const char *dbug_setting = nullptr;

bool xtrabackup_incremental_force_scan = false;
Expand Down Expand Up @@ -685,6 +689,9 @@ enum options_xtrabackup {
OPT_INNODB_REDO_LOG_ENCRYPT,
OPT_INNODB_UNDO_LOG_ENCRYPT,
OPT_XTRA_DEBUG_SYNC,
#ifdef UNIV_DEBUG
OPT_XTRA_DEBUG_SYNC_THREAD,
#endif /* UNIV_DEBUG */
OPT_XTRA_COMPACT,
OPT_XTRA_REBUILD_INDEXES,
OPT_XTRA_REBUILD_THREADS,
Expand Down Expand Up @@ -1559,6 +1566,18 @@ Disable with --skip-innodb-checksums.",
"Debug sync point. This is only used by the xtrabackup test suite",
(G_PTR *)&xtrabackup_debug_sync, (G_PTR *)&xtrabackup_debug_sync, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},

#ifdef UNIV_DEBUG
{"debug-sync-thread", OPT_XTRA_DEBUG_SYNC_THREAD,
"Thread sleeps at sync point and creates a filename with the "
"sync_point_name_<thread_number>. There can be multiple threads"
"Delete the file to resume the thread. This is only used by the "
"xtrabackup test suite",
(G_PTR *)&xtrabackup_debug_sync_thread,
(G_PTR *)&xtrabackup_debug_sync_thread, 0, GET_STR, REQUIRED_ARG, 0, 0, 0,
0, 0, 0},
#endif /* UNIV_DEBUG */

#endif

#ifndef NDEBUG
Expand Down Expand Up @@ -1658,6 +1677,37 @@ static void sigcont_handler(int sig);
static void sigcont_handler(int sig __attribute__((unused))) {
debug_sync_resumed = 1;
}

#ifdef UNIV_DEBUG
static void sigusr1_handler(int sig __attribute__((unused))) {
xb::info() << "SIGUSR1 received. Reading debug_sync point from "
"xb_debug_sync_thread file in backup directory";

std::string debug_sync_file =
std::string(xtrabackup_target_dir) + "/xb_debug_sync_thread";
std::ifstream ifs(debug_sync_file);

if (ifs.fail()) {
xb::warn() << "SIGUSR1 signal sent but the xb_debug_sync_thread file with"
"a debug sync point name in it is not present at "
<< xtrabackup_target_dir;
return;
}

debug_sync_file_content.assign((std::istreambuf_iterator<char>(ifs)),
(std::istreambuf_iterator<char>()));
debug_sync_file_content.erase(
std::remove(debug_sync_file_content.begin(),
debug_sync_file_content.end(), '\n'),
debug_sync_file_content.cend());
*const_cast<const char **>(&xtrabackup_debug_sync_thread) =
debug_sync_file_content.c_str();

xb::info() << "DEBUG_SYNC_THREAD: Deleting file" << debug_sync_file;
os_file_delete(0, debug_sync_file.c_str());
}
#endif /* UNIV_DEBUG */

#endif

void debug_sync_point(const char *name) {
Expand Down Expand Up @@ -1701,6 +1751,52 @@ void debug_sync_point(const char *name) {
#endif
}

#ifdef UNIV_DEBUG
void debug_sync_thread(const char *name) {
#ifndef __WIN__
FILE *fp;
char pid_path[FN_REFLEN];

if (xtrabackup_debug_sync_thread == nullptr) {
return;
}

if (strcmp(xtrabackup_debug_sync_thread, name)) {
return;
}

auto thread_id = std::hash<std::thread::id>{}(std::this_thread::get_id());

snprintf(pid_path, sizeof(pid_path), "%s/%s_%lu", xtrabackup_target_dir, name,
thread_id);

fp = fopen(pid_path, "w");
if (fp == NULL) {
xb::error() << "cannot open " << pid_path;
exit(EXIT_FAILURE);
}

fclose(fp);

while (1) {
xb::info() << "DEBUG_SYNC_THREAD: sleeping 1sec. Resume this thread by "
"deleting file "
<< pid_path;
sleep(1);
bool exists = os_file_exists(pid_path);
if (!exists) break;
}

xb::info() << "DEBUG_SYNC_THREAD: thread " << thread_id
<< " resumed from sync point: " << name;

*const_cast<const char **>(&xtrabackup_debug_sync_thread) = nullptr;

#endif
}

#endif /* UNIV_DEBUG */

static const char *xb_client_default_groups[] = {"xtrabackup", "client", 0, 0,
0};

Expand Down Expand Up @@ -4221,6 +4317,8 @@ void xtrabackup_backup_func(void) {
exit(EXIT_FAILURE);
}

debug_sync_thread("before_file_copy");

/* Create data copying threads */
data_threads = (data_thread_ctxt_t *)ut::malloc_withkey(
UT_NEW_THIS_FILE_PSI_KEY,
Expand Down Expand Up @@ -7605,6 +7703,9 @@ int main(int argc, char **argv) {

#ifndef __WIN__
signal(SIGCONT, sigcont_handler);
#ifdef UNIV_DEBUG
signal(SIGUSR1, sigusr1_handler);
#endif /* UNIV_DEBUG */
#endif

/* --backup */
Expand Down
98 changes: 98 additions & 0 deletions storage/innobase/xtrabackup/test/inc/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,30 @@ function xtrabackup()
fi
}

function xtrabackup_background() {
# Check if XB_ERROR_LOG is set
if [ -z "${XB_ERROR_LOG+x}" ]; then
echo "Error: XB_ERROR_LOG variable is not set. Please set XB_ERROR_LOG to the desired error log file path." >&2
return 1
fi

# Check if the number of arguments passed is correct
if [ $# -lt 1 ]; then
echo "Usage: xtrabackup_background [args...]" >&2
return 1
fi

local args="$@" # Combine all arguments to form the xtrabackup command

# Execute xtrabackup in the background using the run_cmd_background function and store the PID in XB_PID
run_cmd_background $XB_BIN $XB_ARGS $args
XB_PID=$!

# Optionally, return the PID of the background process
echo $XB_PID
}


function rr_xtrabackup()
{
run_cmd rr $XB_BIN $XB_ARGS "$@"
Expand Down Expand Up @@ -135,6 +159,32 @@ function run_cmd_expect_failure()
fi
}

function run_cmd_background() {
# Check if XB_ERROR_LOG is set
if [ -z "${XB_ERROR_LOG+x}" ]; then
echo "Error: XB_ERROR_LOG variable is not set. Please set XB_ERROR_LOG to the desired error log file path." >&2
return 1
fi

# Check if the number of arguments passed is correct
if [ $# -lt 1 ]; then
echo "Usage: run_cmd_background <command> [args...]" >&2
return 1
fi

local cmd="$@" # Combine all arguments to form the command

# Execute the command in the background, piping stdout to tee to duplicate the output to both the error log file and stdout
# Dont use PIPE here as the PID will be pid of pipe
$cmd > >(tee -a "$XB_ERROR_LOG") 2>&1 &

# Capture the PID of the background process and store it in JOB_ID
CMD_PID=$!

# Optionally, return the PID of the command
echo $CMD_PID
}

function load_sakila()
{
vlog "Loading sakila"
Expand Down Expand Up @@ -1462,5 +1512,53 @@ function wait_for_file_to_generated() {
done
}

function wait_for_debug_sync_new_point()
{
# Check if the number of arguments passed is correct
if [ $# -lt 1 ]; then
echo "Usage: wait_for_debug_sync_new_point <debug_sync_point_name>"
return 1
fi

local sync_point=$1
if [ -z "${XB_ERROR_LOG+x}" ]; then
echo "Error: XB_ERROR_LOG variable is not set. Please set XB_ERROR_LOG to the desired error log file path." >&2
return 1
fi

while ! egrep -q "DEBUG_SYNC_NEW: sleeping 1sec. Resume this thread by deleting file.*$sync_point.*" ${XB_ERROR_LOG} ; do
sleep 1
vlog "waiting for debug_sync_new point $sync_point in $XB_ERROR_LOG"
done
}

function resume_debug_sync_new_point()
{
# Check if the number of arguments passed is correct
if [ $# -lt 2 ]; then
echo "Usage: resume_debug_sync_new_point <debug_sync_point_name> <backup_dir>"
return 1
fi

local sync_point=$1
local backup_dir=$2

if [ -z "${XB_ERROR_LOG+x}" ]; then
echo "Error: XB_ERROR_LOG variable is not set. Please set XB_ERROR_LOG to the desired error log file path." >&2
return 1
fi

echo "Resume from sync point: $sync_point"
rm $backup_dir/"$sync_point"_*

echo "wait for resumed signal of $sync_point"

while ! egrep -q "DEBUG_SYNC_NEW: thread .* resumed from sync point: $sync_point" ${XB_ERROR_LOG} ; do
sleep 1
vlog "waiting for debug_sync_new point resume $sync_point in pxb $XB_ERROR_LOG"
done

}

# To avoid unbound variable error when no server have been started
SRV_MYSQLD_IDS=

0 comments on commit 7364061

Please sign in to comment.