Skip to content

Commit

Permalink
Don't block forever in find-new-repl-db
Browse files Browse the repository at this point in the history
Handle out-of-order on physreps gracefully
Re-verify reverse-connection / registry

Signed-off-by: Mark Hannum <[email protected]>
  • Loading branch information
markhannum committed Oct 1, 2024
1 parent f79e2c5 commit 1223343
Show file tree
Hide file tree
Showing 14 changed files with 772 additions and 338 deletions.
14 changes: 12 additions & 2 deletions bdb/rep.c
Original file line number Diff line number Diff line change
Expand Up @@ -4191,13 +4191,23 @@ static int process_berkdb(bdb_state_type *bdb_state, char *host, DBT *control, D
}

int gbl_force_incoherent = 0;
int gbl_force_incoherent_master = 0;
int gbl_ignore_coherency = 0;

static int bdb_am_i_coherent_int(bdb_state_type *bdb_state)
{
/*master can't be incoherent*/
if (bdb_amimaster(bdb_state))
if (bdb_amimaster(bdb_state)) {
if (gbl_force_incoherent_master) {
static time_t lastpr = 0;
time_t now = time(NULL);
if (now - lastpr) {
logmsg(LOGMSG_WARN, "%s returning INCOHERENT on force_incoherent_master\n", __func__);
lastpr = now;
}
return 0;
}
return 1;
}

/* force_incoherent overrides ignore_coherency */
if (gbl_force_incoherent) {
Expand Down
36 changes: 20 additions & 16 deletions berkdb/rep/rep_record.c
Original file line number Diff line number Diff line change
Expand Up @@ -2987,6 +2987,7 @@ static inline void repdb_dequeue(DBT *control_dbt, DBT *rec_dbt)
}

__thread int disable_random_deadlocks = 0;
__thread int physrep_out_of_order = 0;

/*
* __rep_apply --
Expand Down Expand Up @@ -3125,23 +3126,26 @@ __rep_apply_int(dbenv, rp, rec, ret_lsnp, commit_gen, decoupled)
* That said, I really don't want to do db operations holding the
* log mutex, so the synchronization here is tricky.
*/
/* TODO: return a message telling the physical replicant to go
* into matching mode */
if (gbl_is_physical_replicant && cmp != 0)
if (gbl_is_physical_replicant)
{
static uint32_t count=0;
count++;
if (gbl_physrep_debug == 1) {
logmsg(LOGMSG_USER, "%s out-of-order lsn [%d][%d] instead of [%d][%d], count %u\n",
__func__, rp->lsn.file, rp->lsn.offset, lp->ready_lsn.file,
lp->ready_lsn.offset, count);
}
/* A master node in a physical replication cluster would not
* have the ability to 'ask' for missing log records.
*/
if (F_ISSET(rep, REP_F_MASTER)) {
goto done;
}
if(cmp != 0) {
static uint32_t count=0;
count++;
physrep_out_of_order = 1;
if (gbl_physrep_debug == 1) {
logmsg(LOGMSG_USER, "%s out-of-order lsn [%d][%d] instead of [%d][%d], count %u\n",
__func__, rp->lsn.file, rp->lsn.offset, lp->ready_lsn.file,
lp->ready_lsn.offset, count);
}
/* A master node in a physical replication cluster would not
* have the ability to 'ask' for missing log records.
*/
if (F_ISSET(rep, REP_F_MASTER)) {
goto done;
}
} else {
physrep_out_of_order = 0;
}
}

if (cmp == 0) {
Expand Down
3 changes: 2 additions & 1 deletion db/comdb2.c
Original file line number Diff line number Diff line change
Expand Up @@ -1713,7 +1713,8 @@ void clean_exit(void)
bdb_exiting(thedb->static_table.handle);

stop_threads(thedb);
physrep_cleanup();
if (!gbl_exit)
physrep_cleanup();
flush_db();
if (gbl_backend_opened)
llmeta_dump_mapping(thedb);
Expand Down
14 changes: 14 additions & 0 deletions db/db_tunables.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ extern int gbl_last_locked_seqnum;
extern int gbl_set_coherent_state_trace;
extern int gbl_incoherent_slow_inactive_timeout;
extern int gbl_force_incoherent;
extern int gbl_force_incoherent_master;
extern int gbl_ignore_coherency;
extern int gbl_skip_catchup_logic;
extern int gbl_debug_downgrade_cluster_at_open;
Expand Down Expand Up @@ -453,6 +454,10 @@ extern int gbl_fdb_io_error_retries_phase_2_poll;
extern int gbl_fdb_auth_enabled;
extern int gbl_debug_invalid_genid;

/* Tranlog */
extern int gbl_tranlog_incoherent_timeout;
extern int gbl_tranlog_maxpoll;

/* Physical replication */
extern int gbl_blocking_physrep;
extern int gbl_physrep_check_minlog_freq_sec;
Expand All @@ -461,6 +466,8 @@ extern int gbl_physrep_exit_on_invalid_logstream;
extern int gbl_physrep_fanout;
extern int gbl_physrep_hung_replicant_check_freq_sec;
extern int gbl_physrep_hung_replicant_threshold;
extern int gbl_physrep_revconn_check_interval;
extern int gbl_physrep_update_registry_interval;
extern int gbl_physrep_i_am_metadb;
extern int gbl_physrep_keepalive_freq_sec;
extern int gbl_physrep_max_candidates;
Expand All @@ -470,11 +477,18 @@ extern int gbl_physrep_register_interval;
extern int gbl_physrep_shuffle_host_list;
extern int gbl_physrep_ignore_queues;

/* source-name / host is from lrl */
extern char *gbl_physrep_source_dbname;
extern char *gbl_physrep_source_host;

/* meta-name / host is from lrl */
extern char *gbl_physrep_metadb_name;
extern char *gbl_physrep_metadb_host;

/* repl-name / host is the active connection */
extern char *gbl_physrep_repl_name;
extern char *gbl_physrep_repl_host;

/* Reversql connection/sql */
extern int gbl_revsql_allow_command_exec;
extern int gbl_revsql_debug;
Expand Down
63 changes: 32 additions & 31 deletions db/db_tunables.h
Original file line number Diff line number Diff line change
Expand Up @@ -1766,13 +1766,14 @@ REGISTER_TUNABLE("blocking_physrep",
"Physical replicant blocks on select. (Default: false)",
TUNABLE_BOOLEAN, &gbl_blocking_physrep, 0, NULL, NULL, NULL,
NULL);
REGISTER_TUNABLE("physrep_check_minlog_freq_sec",
"Check the minimum log number to keep this often. (Default: 10)",
TUNABLE_INTEGER, &gbl_physrep_check_minlog_freq_sec, 0, NULL,
NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_debug",
"Print extended physrep trace. (Default: off)",
TUNABLE_BOOLEAN, &gbl_physrep_debug, 0, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("tranlog_incoherent_timeout", "Timeout in seconds for incoherent tranlog. (Default: 10)",
TUNABLE_INTEGER, &gbl_tranlog_incoherent_timeout, 0, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("tranlog_maxpoll", "Tranlog timeout in seconds for blocking poll. (Default: 60)", TUNABLE_INTEGER,
&gbl_tranlog_maxpoll, 0, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_check_minlog_freq_sec", "Check the minimum log number to keep this often. (Default: 10)",
TUNABLE_INTEGER, &gbl_physrep_check_minlog_freq_sec, 0, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_debug", "Print extended physrep trace. (Default: off)", TUNABLE_BOOLEAN, &gbl_physrep_debug,
0, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_exit_on_invalid_logstream", "Exit physreps on invalid logstream. (Default: off)",
TUNABLE_BOOLEAN, &gbl_physrep_exit_on_invalid_logstream, 0, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_fanout",
Expand All @@ -1786,6 +1787,10 @@ REGISTER_TUNABLE("physrep_hung_replicant_threshold",
"Report if the physical replicant has been inactive for this duration. (Default: 60)",
TUNABLE_INTEGER, &gbl_physrep_hung_replicant_threshold, 0, NULL,
NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_update_registry_interval", "Physrep update-registry interval. (Default: 60)", TUNABLE_INTEGER,
&gbl_physrep_update_registry_interval, 0, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_revconn_check_interval", "Physrep recheck revconn interval. (Default: 60)", TUNABLE_INTEGER,
&gbl_physrep_revconn_check_interval, 0, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_i_am_metadb", "I am physical replication metadb (Default: off)",
TUNABLE_BOOLEAN, &gbl_physrep_i_am_metadb, NOARG, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_keepalive_freq_sec",
Expand All @@ -1812,21 +1817,20 @@ REGISTER_TUNABLE("physrep_reconnect_penalty",
"Physrep wait seconds before retry to the same node. (Default: 5)",
TUNABLE_INTEGER, &gbl_physrep_reconnect_penalty, 0, NULL, NULL,
NULL, NULL);
REGISTER_TUNABLE("physrep_register_interval",
"Interval for physical replicant re-registration. (Default: 3600)",
TUNABLE_INTEGER, &gbl_physrep_register_interval, 0, NULL, NULL,
NULL, NULL);
REGISTER_TUNABLE("physrep_register_interval", "Interval for physical replicant re-registration. (Default: 600)",
TUNABLE_INTEGER, &gbl_physrep_register_interval, 0, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_shuffle_host_list",
"Shuffle the host list returned by register_replicant() "
"before connecting to the hosts. (Default: OFF)",
TUNABLE_BOOLEAN, &gbl_physrep_shuffle_host_list, 0, NULL, NULL,
NULL, NULL);
REGISTER_TUNABLE("physrep_source_dbname", "Physical replication source cluster dbname.",
TUNABLE_STRING, &gbl_physrep_source_dbname, READONLY, NULL, NULL, NULL,
NULL);
REGISTER_TUNABLE("physrep_source_host", "List of physical replication source cluster hosts.",
TUNABLE_STRING, &gbl_physrep_source_host, READONLY, NULL, NULL, NULL,
NULL);
TUNABLE_BOOLEAN, &gbl_physrep_shuffle_host_list, 0, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_repl_name", "Current physrep parent.", TUNABLE_STRING, &gbl_physrep_repl_name, READONLY, NULL,
NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_repl_host", "Current physrep host.", TUNABLE_STRING, &gbl_physrep_repl_host, READONLY, NULL,
NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_source_dbname", "Physical replication source cluster dbname.", TUNABLE_STRING,
&gbl_physrep_source_dbname, READONLY, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_source_host", "List of physical replication source cluster hosts.", TUNABLE_STRING,
&gbl_physrep_source_host, READONLY, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("physrep_ignore_queues", "Don't replicate queues.", TUNABLE_BOOLEAN, &gbl_physrep_ignore_queues,
READONLY, NULL, NULL, NULL, NULL);

Expand All @@ -1835,8 +1839,7 @@ REGISTER_TUNABLE("revsql_allow_command_execution",
"Allow processing and execution of command over the 'reverse connection' "
"that has come in as part of the request. This is mostly intended for "
"testing. (Default: off)",
TUNABLE_BOOLEAN, &gbl_revsql_allow_command_exec, EXPERIMENTAL | INTERNAL,
NULL, NULL, NULL, NULL);
TUNABLE_BOOLEAN, &gbl_revsql_allow_command_exec, EXPERIMENTAL | INTERNAL, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("revsql_cdb2_debug",
"Print extended reversql-sql cdb2 related trace. (Default: off)",
TUNABLE_BOOLEAN, &gbl_revsql_cdb2_debug, EXPERIMENTAL | INTERNAL,
Expand Down Expand Up @@ -1887,25 +1890,23 @@ REGISTER_TUNABLE("force_incoherent",
TUNABLE_BOOLEAN, &gbl_force_incoherent,
EXPERIMENTAL | INTERNAL, NULL, NULL, NULL, NULL);

REGISTER_TUNABLE("ignore_coherency",
"Force this node to be coherent. (Default: off)",
TUNABLE_BOOLEAN, &gbl_ignore_coherency,
EXPERIMENTAL | INTERNAL, NULL, NULL, NULL, NULL);
REGISTER_TUNABLE("force_incoherent_master", "Force master node to be incoherent. (Default: off)", TUNABLE_BOOLEAN,
&gbl_force_incoherent_master, EXPERIMENTAL | INTERNAL, NULL, NULL, NULL, NULL);

REGISTER_TUNABLE("ignore_coherency", "Force this node to be coherent. (Default: off)", TUNABLE_BOOLEAN,
&gbl_ignore_coherency, EXPERIMENTAL | INTERNAL, NULL, NULL, NULL, NULL);

REGISTER_TUNABLE("forbid_incoherent_writes",
"Prevent writes against a node which was incoherent at "
"transaction start. (Default: off)",
TUNABLE_BOOLEAN, &gbl_forbid_incoherent_writes,
EXPERIMENTAL | INTERNAL, NULL, NULL, NULL, NULL);
TUNABLE_BOOLEAN, &gbl_forbid_incoherent_writes, EXPERIMENTAL | INTERNAL, NULL, NULL, NULL, NULL);

REGISTER_TUNABLE("debug_downgrade_cluster_at_open",
"Sleep on open to allow testsuite to downgrade master. (Default: off)", TUNABLE_BOOLEAN,
&gbl_debug_downgrade_cluster_at_open, EXPERIMENTAL | INTERNAL, NULL, NULL, NULL, NULL);

REGISTER_TUNABLE("skip_catchup_logic",
"Skip initial catchup logic. (Default: off)", TUNABLE_BOOLEAN,
&gbl_skip_catchup_logic, EXPERIMENTAL | INTERNAL, NULL, NULL,
NULL, NULL);
REGISTER_TUNABLE("skip_catchup_logic", "Skip initial catchup logic. (Default: off)", TUNABLE_BOOLEAN,
&gbl_skip_catchup_logic, EXPERIMENTAL | INTERNAL, NULL, NULL, NULL, NULL);

REGISTER_TUNABLE("sample_queries", "Sample queries and query plans to table comdb2_sample_queries. (Default: on)",
TUNABLE_BOOLEAN, &gbl_sample_queries, 0, NULL, NULL, NULL, NULL);
Expand Down
Loading

0 comments on commit 1223343

Please sign in to comment.