Kill existing connections when changing roles (#587)

Signed-off-by: Masayuki Ishii <[email protected]>
cybozu-go · Nov 1, 2023 · 3bad1f5 · 3bad1f5
1 parent 70e2e4f
commit 3bad1f5
Show file tree

Hide file tree

Showing 6 changed files with 149 additions and 54 deletions.
diff --git a/Makefile b/Makefile
@@ -94,7 +94,7 @@ check-generate:
 envtest: setup-envtest
  source <($(SETUP_ENVTEST) use -p env); \
  export MOCO_CHECK_INTERVAL=100ms; \
- export MOCO_WAIT_INTERVAL=100ms; \
+ export MOCO_CLONE_WAIT_DURATION=100ms; \
  go test -v -count 1 -race ./clustering -ginkgo.progress -ginkgo.v -ginkgo.failFast
  source <($(SETUP_ENVTEST) use -p env); \
  export DEBUG_CONTROLLER=1; \

diff --git a/clustering/manager_test.go b/clustering/manager_test.go
@@ -540,9 +540,12 @@ var _ = Describe("manager", func() {
  }
  }
 
+ // confirm that connections of the mysql whose role has changed are killed
  for i := 0; i < 3; i++ {
  switch i {
- case 0: // connection in demoted instance should be killed
+ case 0: // KilleConnection is called twice: when the start of the switchover and the changing of role.
+ Expect(of.getKillConnectionsCount(cluster.PodHostname(i))).To(Equal(2))
+ case newPrimary:
  Expect(of.getKillConnectionsCount(cluster.PodHostname(i))).To(Equal(1))
  default:
  Expect(of.getKillConnectionsCount(cluster.PodHostname(i))).To(Equal(0))
@@ -705,7 +708,12 @@ var _ = Describe("manager", func() {
  Expect(failOverEvents).To(Equal(1))
 
  for i := 0; i < 3; i++ {
- Expect(of.getKillConnectionsCount(cluster.PodHostname(i))).To(Equal(0))
+ switch i {
+ case 1:
+ Expect(of.getKillConnectionsCount(cluster.PodHostname(i))).To(Equal(1))
+ default:
+ Expect(of.getKillConnectionsCount(cluster.PodHostname(i))).To(Equal(0))
+ }
  }
 
  By("recovering failed instance")
@@ -796,6 +804,8 @@ var _ = Describe("manager", func() {
  }).Should(Succeed())
 
  By("making an errant replica")
+ of.resetKillConnectionsCount()
+
  // When the primary load is high, sometimes the gtid_executed of a replica precedes the primary.
  // pod(4) is intended for such situations.
  testSetGTID(cluster.PodHostname(0), "p0:1,p0:2,p0:3") // primary
@@ -846,6 +856,10 @@ var _ = Describe("manager", func() {
  Expect(st1.ReplicaStatus.SlaveIORunning).NotTo(Equal("Yes"))
  }
 
+ for i := 0; i < 5; i++ {
+ Expect(of.getKillConnectionsCount(cluster.PodHostname(i))).To(Equal(0))
+ }
+
  By("triggering a failover")
  of.setRetrievedGTIDSet(cluster.PodHostname(2), "p0:1")
  of.setRetrievedGTIDSet(cluster.PodHostname(3), "p0:1,p0:2,p0:3")
@@ -909,6 +923,7 @@ var _ = Describe("manager", func() {
  Expect(failOverEvents).To(Equal(1))
 
  By("re-initializing the errant replica")
+ of.resetKillConnectionsCount()
  testSetGTID(cluster.PodHostname(1), "")
  Eventually(func() interface{} {
  return ms.errantReplicas
@@ -937,6 +952,15 @@ var _ = Describe("manager", func() {
  }
  }).Should(Succeed())
 
+ for i := 0; i < 5; i++ {
+ switch i {
+ case 1:
+ Expect(of.getKillConnectionsCount(cluster.PodHostname(i))).To(Equal(1))
+ default:
+ Expect(of.getKillConnectionsCount(cluster.PodHostname(i))).To(Equal(0))
+ }
+ }
+
  By("stopping instances to make the cluster lost")
  of.setFailing(cluster.PodHostname(3), true)
  of.setFailing(cluster.PodHostname(1), true)

diff --git a/clustering/operations.go b/clustering/operations.go
@@ -23,18 +23,33 @@ const (
  failOverTimeoutSeconds = 3600
 )
 
-var waitForRestartDuration = 3 * time.Second
+var (
+ waitForCloneRestartDuration = 3 * time.Second
+ waitForRoleChangeDuration = 300 * time.Millisecond
+)
+
+func init() {
+ intervalStr := os.Getenv("MOCO_CLONE_WAIT_DURATION")
+ if intervalStr == "" {
+ return
+ }
+ interval, err := time.ParseDuration(intervalStr)
+ if err != nil {
+ return
+ }
+ waitForCloneRestartDuration = interval
+}
 
 func init() {
- intervalStr := os.Getenv("MOCO_WAIT_INTERVAL")
+ intervalStr := os.Getenv("MOCO_ROLE_WAIT_DURATION")
  if intervalStr == "" {
  return
  }
  interval, err := time.ParseDuration(intervalStr)
  if err != nil {
  return
  }
- waitForRestartDuration = interval
+ waitForRoleChangeDuration = interval
 }
 
 func (p *managerProcess) isCloning(ctx context.Context, ss *StatusSet) bool {
@@ -115,7 +130,7 @@ func (p *managerProcess) clone(ctx context.Context, ss *StatusSet) (bool, error)
 
  // wait until the instance restarts after clone
  op := ss.DBOps[ss.Primary]
- time.Sleep(waitForRestartDuration)
+ time.Sleep(waitForCloneRestartDuration)
  for i := 0; i < 60; i++ {
  select {
  case <-time.After(1 * time.Second):
@@ -249,9 +264,90 @@ func (p *managerProcess) failover(ctx context.Context, ss *StatusSet) error {
  return nil
 }
 
+func (p *managerProcess) removeRoleLabel(ctx context.Context, ss *StatusSet) ([]int, error) {
+ var noRoles []int
+ for i, pod := range ss.Pods {
+ v := pod.Labels[constants.LabelMocoRole]
+ if v == "" {
+ noRoles = append(noRoles, i)
+ continue
+ }
+
+ if i == ss.Primary && v == constants.RolePrimary {
+ continue
+ }
+ if i != ss.Primary && !isErrantReplica(ss, i) && v == constants.RoleReplica {
+ continue
+ }
+
+ noRoles = append(noRoles, i)
+ modified := pod.DeepCopy()
+ delete(modified.Labels, constants.LabelMocoRole)
+ if err := p.client.Patch(ctx, modified, client.MergeFrom(pod)); err != nil {
+ return nil, fmt.Errorf("failed to remove %s label from %s/%s: %w", constants.LabelMocoRole, pod.Namespace, pod.Name, err)
+ }
+ }
+ return noRoles, nil
+}
+
+func (p *managerProcess) addRoleLabel(ctx context.Context, ss *StatusSet, noRoles []int) error {
+ for _, i := range noRoles {
+ if isErrantReplica(ss, i) {
+ continue
+ }
+
+ var newValue string
+ if i == ss.Primary {
+ newValue = constants.RolePrimary
+ } else {
+ newValue = constants.RoleReplica
+ }
+
+ pod := ss.Pods[i]
+ modified := pod.DeepCopy()
+ if modified.Labels == nil {
+ modified.Labels = make(map[string]string)
+ }
+ modified.Labels[constants.LabelMocoRole] = newValue
+ if err := p.client.Patch(ctx, modified, client.MergeFrom(pod)); err != nil {
+ return fmt.Errorf("failed to add %s label to pod %s/%s: %w", constants.LabelMocoRole, pod.Namespace, pod.Name, err)
+ }
+ }
+ return nil
+}
+
 func (p *managerProcess) configure(ctx context.Context, ss *StatusSet) (bool, error) {
  redo := false
 
+ // remove old role label from mysql pods whose role is changed
+ // NOTE:
+ // I want to redo if even one pod is updated to refresh pod resources in StatusSet.
+ // But if some mysql instances are down, there is a wait of about 9 seconds at "(*managerProdess).GatherStatus()" after redo.
+ // The wait slows the recovery process, and downtime becomes longer. To prevent that, continue processing without redoing.
+ noRoles, err := p.removeRoleLabel(ctx, ss)
+ if err != nil {
+ return false, err
+ }
+
+ // if the role of alive instances is changed, kill the connections on those instances
+ var alive []int
+ for _, i := range noRoles {
+ if ss.MySQLStatus[i] == nil || isErrantReplica(ss, i) {
+ continue
+ }
+ alive = append(alive, i)
+ }
+ if len(alive) > 0 {
+ // I hope the backend pods of primary and replica services will be updated during this sleep.
+ time.Sleep(waitForRoleChangeDuration)
+ }
+ for _, i := range alive {
+ if err := ss.DBOps[i].KillConnections(ctx); err != nil {
+ return false, fmt.Errorf("failed to kill connections in instance %d: %w", i, err)
+ }
+ }
+
+ // configure primary instance
  if ss.Cluster.Spec.ReplicationSourceSecretName != nil {
  r, err := p.configureIntermediatePrimary(ctx, ss)
  if err != nil {
@@ -266,6 +362,7 @@ func (p *managerProcess) configure(ctx context.Context, ss *StatusSet) (bool, er
  redo = redo || r
  }
 
+ // configure replica instances
  for i, ist := range ss.MySQLStatus {
  if i == ss.Primary {
  continue
@@ -280,46 +377,10 @@ func (p *managerProcess) configure(ctx context.Context, ss *StatusSet) (bool, er
  redo = redo || r
  }
 
- // update labels
- for i, pod := range ss.Pods {
- if i == ss.Primary {
- if pod.Labels[constants.LabelMocoRole] != constants.RolePrimary {
- redo = true
- modified := pod.DeepCopy()
- if modified.Labels == nil {
- modified.Labels = make(map[string]string)
- }
- modified.Labels[constants.LabelMocoRole] = constants.RolePrimary
- if err := p.client.Patch(ctx, modified, client.MergeFrom(pod)); err != nil {
- return false, fmt.Errorf("failed to set role for pod %s/%s: %w", pod.Namespace, pod.Name, err)
- }
- }
- continue
- }
-
- if ss.MySQLStatus[i] != nil && ss.MySQLStatus[i].IsErrant {
- if _, ok := pod.Labels[constants.LabelMocoRole]; ok {
- redo = true
- modified := pod.DeepCopy()
- delete(modified.Labels, constants.LabelMocoRole)
- if err := p.client.Patch(ctx, modified, client.MergeFrom(pod)); err != nil {
- return false, fmt.Errorf("failed to set role for pod %s/%s: %w", pod.Namespace, pod.Name, err)
- }
- }
- continue
- }
-
- if pod.Labels[constants.LabelMocoRole] != constants.RoleReplica {
- redo = true
- modified := pod.DeepCopy()
- if modified.Labels == nil {
- modified.Labels = make(map[string]string)
- }
- modified.Labels[constants.LabelMocoRole] = constants.RoleReplica
- if err := p.client.Patch(ctx, modified, client.MergeFrom(pod)); err != nil {
- return false, fmt.Errorf("failed to set role for pod %s/%s: %w", pod.Namespace, pod.Name, err)
- }
- }
+ // add new role label
+ err = p.addRoleLabel(ctx, ss, noRoles)
+ if err != nil {
+ return false, err
  }
 
  // make the primary writable if it is not an intermediate primary
@@ -484,7 +545,7 @@ func (p *managerProcess) configureReplica(ctx context.Context, ss *StatusSet, in
  log.Info("clone succeeded", "instance", index)
 
  // wait until the instance restarts after clone
- time.Sleep(waitForRestartDuration)
+ time.Sleep(waitForCloneRestartDuration)
  for i := 0; i < 60; i++ {
  select {
  case <-time.After(1 * time.Second):

diff --git a/clustering/status.go b/clustering/status.go
@@ -4,6 +4,7 @@ import (
  "context"
  "fmt"
  "os"
+ "slices"
  "sort"
  "strconv"
  "strings"
@@ -208,10 +209,10 @@ func (p *managerProcess) GatherStatus(ctx context.Context) (*StatusSet, error) {
  }
  // process errors
  if j == statusCheckRetryMax {
- logFromContext(ctx).Error(err, "failed to get mysqld status, mysqld is not ready")
+ logFromContext(ctx).Error(err, "failed to get mysqld status, mysqld is not ready", "instance", index)
  return
  }
- logFromContext(ctx).Error(err, "failed to get mysqld status, will retry")
+ logFromContext(ctx).Error(err, "failed to get mysqld status, will retry", "instance", index)
  time.Sleep(statusCheckRetryInterval)
  }
  }(i)
@@ -299,6 +300,13 @@ func containErrantTransactions(primaryUUID, gtidSet string) bool {
  return false
 }
 
+func isErrantReplica(ss *StatusSet, index int) bool {
+ if ss.MySQLStatus[index] != nil && ss.MySQLStatus[index].IsErrant {
+ return true
+ }
+ return slices.Contains(ss.Errants, index)
+}
+
 func isPodReady(pod *corev1.Pod) bool {
  for _, cond := range pod.Status.Conditions {
  if cond.Type != corev1.PodReady {

diff --git a/go.mod b/go.mod
@@ -114,10 +114,12 @@ require (
  golang.org/x/crypto v0.14.0 // indirect
  golang.org/x/net v0.17.0 // indirect
  golang.org/x/oauth2 v0.8.0 // indirect
+ golang.org/x/sync v0.4.0 // indirect
  golang.org/x/sys v0.13.0 // indirect
  golang.org/x/term v0.13.0 // indirect
  golang.org/x/text v0.13.0 // indirect
  golang.org/x/time v0.3.0 // indirect
+ golang.org/x/tools v0.14.0 // indirect
  golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
  gomodules.xyz/jsonpatch/v2 v2.3.0 // indirect
  google.golang.org/appengine v1.6.7 // indirect

diff --git a/go.sum b/go.sum
@@ -368,8 +368,8 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.2.0 h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI=
-golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ=
+golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -417,8 +417,8 @@ golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4f
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
-golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo=
-golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc=
+golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc=
+golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=