Skip to content

Commit

Permalink
vtorc: Clean up more unused code
Browse files Browse the repository at this point in the history
This goes a bit further than previous just `deadcode` cleanup, but it
digs deeper into which parts are actually unused. We don't seem to use
any of the binary log file positioning logic, since we all use GTID
based logic so that can all be cleaned up.

This then removes a bunch of other unused fields etc. as well.

Lastly, we rename the data_center attribute to cell since that's the
internal Vitess terminology.

Signed-off-by: Dirkjan Bussink <[email protected]>
  • Loading branch information
dbussink committed Apr 2, 2024
1 parent e55897b commit 2b14f17
Show file tree
Hide file tree
Showing 18 changed files with 101 additions and 806 deletions.
11 changes: 0 additions & 11 deletions go/test/endtoend/vtorc/readtopologyinstance/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ limitations under the License.
package readtopologyinstance

import (
"fmt"
"os"
"testing"
"time"
Expand Down Expand Up @@ -86,8 +85,6 @@ func TestReadTopologyInstanceBufferable(t *testing.T) {
assert.Equal(t, "ROW", primaryInstance.BinlogFormat)
assert.Equal(t, "ON", primaryInstance.GTIDMode)
assert.Equal(t, "FULL", primaryInstance.BinlogRowImage)
assert.Contains(t, primaryInstance.SelfBinlogCoordinates.LogFile, fmt.Sprintf("vt-0000000%d-bin", primary.TabletUID))
assert.Greater(t, primaryInstance.SelfBinlogCoordinates.LogPos, uint32(0))
assert.True(t, primaryInstance.SemiSyncPrimaryEnabled)
assert.True(t, primaryInstance.SemiSyncReplicaEnabled)
assert.True(t, primaryInstance.SemiSyncPrimaryStatus)
Expand Down Expand Up @@ -136,8 +133,6 @@ func TestReadTopologyInstanceBufferable(t *testing.T) {
assert.Equal(t, "FULL", replicaInstance.BinlogRowImage)
assert.Equal(t, utils.Hostname, replicaInstance.SourceHost)
assert.Equal(t, primary.MySQLPort, replicaInstance.SourcePort)
assert.Contains(t, replicaInstance.SelfBinlogCoordinates.LogFile, fmt.Sprintf("vt-0000000%d-bin", replica.TabletUID))
assert.Greater(t, replicaInstance.SelfBinlogCoordinates.LogPos, uint32(0))
assert.False(t, replicaInstance.SemiSyncPrimaryEnabled)
assert.True(t, replicaInstance.SemiSyncReplicaEnabled)
assert.False(t, replicaInstance.SemiSyncPrimaryStatus)
Expand All @@ -154,12 +149,6 @@ func TestReadTopologyInstanceBufferable(t *testing.T) {
assert.Equal(t, replicaInstance.ReplicationSQLThreadState, inst.ReplicationThreadStateRunning)
assert.True(t, replicaInstance.ReplicationIOThreadRuning)
assert.True(t, replicaInstance.ReplicationSQLThreadRuning)
assert.Equal(t, replicaInstance.ReadBinlogCoordinates.LogFile, primaryInstance.SelfBinlogCoordinates.LogFile)
assert.Greater(t, replicaInstance.ReadBinlogCoordinates.LogPos, uint32(0))
assert.Equal(t, replicaInstance.ExecBinlogCoordinates.LogFile, primaryInstance.SelfBinlogCoordinates.LogFile)
assert.Greater(t, replicaInstance.ExecBinlogCoordinates.LogPos, uint32(0))
assert.Contains(t, replicaInstance.RelaylogCoordinates.LogFile, fmt.Sprintf("vt-0000000%d-relay", replica.TabletUID))
assert.Greater(t, replicaInstance.RelaylogCoordinates.LogPos, uint32(0))
assert.Empty(t, replicaInstance.LastIOError)
assert.Empty(t, replicaInstance.LastSQLError)
assert.EqualValues(t, 0, replicaInstance.SQLDelay)
Expand Down
56 changes: 28 additions & 28 deletions go/vt/vtorc/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,20 +91,20 @@ func RegisterFlags(fs *pflag.FlagSet) {
// strictly expected from user.
// TODO(sougou): change this to yaml parsing, and possible merge with tabletenv.
type Configuration struct {
SQLite3DataFile string // full path to sqlite3 datafile
InstancePollSeconds uint // Number of seconds between instance reads
SnapshotTopologiesIntervalHours uint // Interval in hour between snapshot-topologies invocation. Default: 0 (disabled)
ReasonableReplicationLagSeconds int // Above this value is considered a problem
AuditLogFile string // Name of log file for audit operations. Disabled when empty.
AuditToSyslog bool // If true, audit messages are written to syslog
AuditToBackendDB bool // If true, audit messages are written to the backend DB's `audit` table (default: true)
AuditPurgeDays uint // Days after which audit entries are purged from the database
RecoveryPeriodBlockSeconds int // (overrides `RecoveryPeriodBlockMinutes`) The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on smae instance as well as flapping
PreventCrossDataCenterPrimaryFailover bool // When true (default: false), cross-DC primary failover are not allowed, vtorc will do all it can to only fail over within same DC, or else not fail over at all.
WaitReplicasTimeoutSeconds int // Timeout on amount of time to wait for the replicas in case of ERS. Should be a small value because we should fail-fast. Should not be larger than LockTimeout since that is the total time we use for an ERS.
TolerableReplicationLagSeconds int // Amount of replication lag that is considered acceptable for a tablet to be eligible for promotion when Vitess makes the choice of a new primary in PRS.
TopoInformationRefreshSeconds int // Timer duration on which VTOrc refreshes the keyspace and vttablet records from the topo-server.
RecoveryPollSeconds int // Timer duration on which VTOrc recovery analysis runs
SQLite3DataFile string // full path to sqlite3 datafile
InstancePollSeconds uint // Number of seconds between instance reads
SnapshotTopologiesIntervalHours uint // Interval in hour between snapshot-topologies invocation. Default: 0 (disabled)
ReasonableReplicationLagSeconds int // Above this value is considered a problem
AuditLogFile string // Name of log file for audit operations. Disabled when empty.
AuditToSyslog bool // If true, audit messages are written to syslog
AuditToBackendDB bool // If true, audit messages are written to the backend DB's `audit` table (default: true)
AuditPurgeDays uint // Days after which audit entries are purged from the database
RecoveryPeriodBlockSeconds int // (overrides `RecoveryPeriodBlockMinutes`) The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on smae instance as well as flapping
PreventCrossCellPrimaryFailover bool // When true (default: false), cross-DC primary failover are not allowed, vtorc will do all it can to only fail over within same DC, or else not fail over at all.
WaitReplicasTimeoutSeconds int // Timeout on amount of time to wait for the replicas in case of ERS. Should be a small value because we should fail-fast. Should not be larger than LockTimeout since that is the total time we use for an ERS.
TolerableReplicationLagSeconds int // Amount of replication lag that is considered acceptable for a tablet to be eligible for promotion when Vitess makes the choice of a new primary in PRS.
TopoInformationRefreshSeconds int // Timer duration on which VTOrc refreshes the keyspace and vttablet records from the topo-server.
RecoveryPollSeconds int // Timer duration on which VTOrc recovery analysis runs
}

// ToJSONString will marshal this configuration as JSON
Expand All @@ -130,7 +130,7 @@ func UpdateConfigValuesFromFlags() {
Config.AuditToSyslog = auditToSyslog
Config.AuditPurgeDays = uint(auditPurgeDuration / (time.Hour * 24))
Config.RecoveryPeriodBlockSeconds = int(recoveryPeriodBlockDuration / time.Second)
Config.PreventCrossDataCenterPrimaryFailover = preventCrossCellFailover
Config.PreventCrossCellPrimaryFailover = preventCrossCellFailover
Config.WaitReplicasTimeoutSeconds = int(waitReplicasTimeout / time.Second)
Config.TolerableReplicationLagSeconds = int(tolerableReplicationLag / time.Second)
Config.TopoInformationRefreshSeconds = int(topoInformationRefreshDuration / time.Second)
Expand Down Expand Up @@ -165,19 +165,19 @@ func LogConfigValues() {

func newConfiguration() *Configuration {
return &Configuration{
SQLite3DataFile: "file::memory:?mode=memory&cache=shared",
InstancePollSeconds: 5,
SnapshotTopologiesIntervalHours: 0,
ReasonableReplicationLagSeconds: 10,
AuditLogFile: "",
AuditToSyslog: false,
AuditToBackendDB: false,
AuditPurgeDays: 7,
RecoveryPeriodBlockSeconds: 30,
PreventCrossDataCenterPrimaryFailover: false,
WaitReplicasTimeoutSeconds: 30,
TopoInformationRefreshSeconds: 15,
RecoveryPollSeconds: 1,
SQLite3DataFile: "file::memory:?mode=memory&cache=shared",
InstancePollSeconds: 5,
SnapshotTopologiesIntervalHours: 0,
ReasonableReplicationLagSeconds: 10,
AuditLogFile: "",
AuditToSyslog: false,
AuditToBackendDB: false,
AuditPurgeDays: 7,
RecoveryPeriodBlockSeconds: 30,
PreventCrossCellPrimaryFailover: false,
WaitReplicasTimeoutSeconds: 30,
TopoInformationRefreshSeconds: 15,
RecoveryPollSeconds: 1,
}
}

Expand Down
2 changes: 1 addition & 1 deletion go/vt/vtorc/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ func TestUpdateConfigValuesFromFlags(t *testing.T) {
}()

testConfig := newConfiguration()
testConfig.PreventCrossDataCenterPrimaryFailover = true
testConfig.PreventCrossCellPrimaryFailover = true
UpdateConfigValuesFromFlags()
require.Equal(t, testConfig, Config)
})
Expand Down
31 changes: 1 addition & 30 deletions go/vt/vtorc/db/generate_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ var TableNames = []string{
"vtorc_db_deployments",
"global_recovery_disable",
"topology_recovery_steps",
"database_instance_stale_binlog_coordinates",
"vitess_tablet",
"vitess_keyspace",
"vitess_shard",
Expand All @@ -54,31 +53,19 @@ CREATE TABLE database_instance (
binlog_format varchar(16) NOT NULL,
log_bin tinyint NOT NULL,
log_replica_updates tinyint NOT NULL,
binary_log_file varchar(128) NOT NULL,
binary_log_pos bigint NOT NULL,
source_host varchar(128) NOT NULL,
source_port smallint NOT NULL,
replica_sql_running tinyint NOT NULL,
replica_io_running tinyint NOT NULL,
source_log_file varchar(128) NOT NULL,
read_source_log_pos bigint NOT NULL,
relay_source_log_file varchar(128) NOT NULL,
exec_source_log_pos bigint NOT NULL,
replication_lag_seconds bigint DEFAULT NULL,
replica_lag_seconds bigint DEFAULT NULL,
read_only TINYint not null default 0,
last_sql_error TEXT not null default '',
last_io_error TEXT not null default '',
oracle_gtid TINYint not null default 0,
mariadb_gtid TINYint not null default 0,
relay_log_file varchar(128) not null default '',
relay_log_pos bigint not null default 0,
pseudo_gtid TINYint not null default 0,
replication_depth TINYint not null default 0,
has_replication_filters TINYint not null default 0,
data_center varchar(32) not null default '',
physical_environment varchar(32) not null default '',
is_co_primary TINYint not null default 0,
cell varchar(32) not null default '',
sql_delay int not null default 0,
binlog_server TINYint not null default 0,
supports_oracle_gtid TINYint not null default 0,
Expand All @@ -88,7 +75,6 @@ CREATE TABLE database_instance (
gtid_purged text not null default '',
has_replication_credentials TINYint not null default 0,
allow_tls TINYint not null default 0,
semi_sync_enforced TINYint not null default 0,
version_comment varchar(128) NOT NULL DEFAULT '',
major_version varchar(16) not null default '',
binlog_row_image varchar(16) not null default '',
Expand All @@ -102,7 +88,6 @@ CREATE TABLE database_instance (
ancestry_uuid text not null default '',
replication_sql_thread_state tinyint signed not null default 0,
replication_io_thread_state tinyint signed not null default 0,
region varchar(32) not null default '',
semi_sync_primary_timeout int NOT NULL DEFAULT 0,
semi_sync_primary_wait_for_replica_count int NOT NULL DEFAULT 0,
semi_sync_primary_status TINYint NOT NULL DEFAULT 0,
Expand Down Expand Up @@ -307,20 +292,6 @@ CREATE TABLE topology_recovery_steps (
PRIMARY KEY (recovery_step_id)
)`,
`
DROP TABLE IF EXISTS database_instance_stale_binlog_coordinates
`,
`
CREATE TABLE database_instance_stale_binlog_coordinates (
alias varchar(256) NOT NULL,
binary_log_file varchar(128) NOT NULL,
binary_log_pos bigint NOT NULL,
first_seen timestamp not null default (''),
PRIMARY KEY (alias)
)`,
`
CREATE INDEX first_seen_idx_database_instance_stale_binlog_coordinates ON database_instance_stale_binlog_coordinates (first_seen)
`,
`
DROP TABLE IF EXISTS vitess_tablet
`,
`
Expand Down
28 changes: 0 additions & 28 deletions go/vt/vtorc/inst/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,34 +83,19 @@ type ReplicationAnalysisHints struct {
AuditAnalysis bool
}

type AnalysisInstanceType string

const (
AnalysisInstanceTypePrimary AnalysisInstanceType = "primary"
AnalysisInstanceTypeCoPrimary AnalysisInstanceType = "co-primary"
AnalysisInstanceTypeIntermediatePrimary AnalysisInstanceType = "intermediate-primary"
)

// ReplicationAnalysis notes analysis on replication chain status, per instance
type ReplicationAnalysis struct {
AnalyzedInstanceHostname string
AnalyzedInstancePort int
AnalyzedInstanceAlias string
AnalyzedInstancePrimaryAlias string
TabletType topodatapb.TabletType
PrimaryTimeStamp time.Time
ClusterDetails ClusterInfo
AnalyzedInstanceDataCenter string
AnalyzedInstanceRegion string
AnalyzedKeyspace string
AnalyzedShard string
// ShardPrimaryTermTimestamp is the primary term start time stored in the shard record.
ShardPrimaryTermTimestamp string
AnalyzedInstancePhysicalEnvironment string
AnalyzedInstanceBinlogCoordinates BinlogCoordinates
IsPrimary bool
IsClusterPrimary bool
IsCoPrimary bool
LastCheckValid bool
LastCheckPartialSuccess bool
CountReplicas uint
Expand All @@ -126,7 +111,6 @@ type ReplicationAnalysis struct {
StructureAnalysis []StructureAnalysisCode
IsBinlogServer bool
OracleGTIDImmediateTopology bool
MariaDBGTIDImmediateTopology bool
BinlogServerImmediateTopology bool
SemiSyncPrimaryEnabled bool
SemiSyncPrimaryStatus bool
Expand Down Expand Up @@ -159,18 +143,6 @@ func (replicationAnalysis *ReplicationAnalysis) MarshalJSON() ([]byte, error) {
return json.Marshal(i)
}

// Get a string description of the analyzed instance type (primary? co-primary? intermediate-primary?)
func (replicationAnalysis *ReplicationAnalysis) GetAnalysisInstanceType() AnalysisInstanceType {
if replicationAnalysis.IsCoPrimary {
return AnalysisInstanceTypeCoPrimary
}

if replicationAnalysis.IsPrimary {
return AnalysisInstanceTypePrimary
}
return AnalysisInstanceTypeIntermediatePrimary
}

// ValidSecondsFromSeenToLastAttemptedCheck returns the maximum allowed elapsed time
// between last_attempted_check to last_checked before we consider the instance as invalid.
func ValidSecondsFromSeenToLastAttemptedCheck() uint {
Expand Down
Loading

0 comments on commit 2b14f17

Please sign in to comment.