From c61568ddf21784b2607431e6999ca55722e26e63 Mon Sep 17 00:00:00 2001 From: lgtm <1gtm@users.noreply.github.com> Date: Sun, 1 Oct 2023 22:20:55 -0700 Subject: [PATCH] [cherry-pick] Add stopBalancer timeout; Retry setBalancerState; Improve logging (#1929) (#1934) /cherry-pick Signed-off-by: Arnob kumar saha --- pkg/backup.go | 80 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 71 insertions(+), 9 deletions(-) diff --git a/pkg/backup.go b/pkg/backup.go index e8ba473ee..470fb8aef 100644 --- a/pkg/backup.go +++ b/pkg/backup.go @@ -607,6 +607,7 @@ func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, err } if secHost != primary { + klog.Infof("Primary %s & Secondary %s found for mongoDSN %s \n", primary, secHost, mongoDSN) return primary, secHost, nil } } @@ -623,10 +624,18 @@ func disabelBalancer(mongosHost string) error { "config", "--host", mongosHost, "--quiet", - "--eval", "JSON.stringify(sh.stopBalancer())", + "--eval", "JSON.stringify(sh.stopBalancer(600000,1000))", }, mongoCreds...) // disable balancer - if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil { + output, err := sh.Command(MongoCMD, args...).Output() + if err != nil { + klog.Errorf("Error while stopping balancer : %s ; output : %s \n", err.Error(), output) + return err + } + + err = json.Unmarshal(output, &v) + if err != nil { + klog.Errorf("Unmarshal error while stopping balancer : %s ; output = %s \n", err.Error(), output) return err } @@ -642,8 +651,10 @@ func disabelBalancer(mongosHost string) error { "--eval", "while(sh.isBalancerRunning()){ print('waiting for balancer to stop...'); sleep(1000);}", }, mongoCreds...) if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").Run(); err != nil { + klog.Errorf("Error while waiting for the balancer to stop : %s \n", err.Error()) return err } + klog.Info("Balancer successfully Disabled.") return nil } @@ -659,14 +670,33 @@ func enableBalancer(mongosHost string) error { "--quiet", "--eval", "JSON.stringify(sh.setBalancerState(true))", }, mongoCreds...) - if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil { + + var ( + output []byte + err error + ) + cmd := sh.Command(MongoCMD, args...) + for i := 0; i < 10; i++ { + output, err = cmd.Output() + if err != nil { + klog.Errorf("Try #%d : Error on setBalancerState command : %s, output : %s .\n", i, err.Error(), output) + time.Sleep(time.Second) + } else { + break + } + } + + err = json.Unmarshal(output, &v) + if err != nil { + klog.Errorf("Unmarshal error while enabling balancer : %+v , output : %s \n", err.Error(), output) return err } if val, ok := v["ok"].(float64); !ok || int(val) != 1 { - return fmt.Errorf("unable to disable balancer. got response: %v", v) + return fmt.Errorf("unable to enable balancer. got response: %v", v) } + klog.Info("Balancer successfully re-enabled.") return nil } @@ -685,7 +715,22 @@ func lockConfigServer(configSVRDSN, secondaryHost string) error { "--quiet", "--eval", "db.BackupControl.findAndModify({query: { _id: 'BackupControlDocument' }, update: { $inc: { counter : 1 } }, new: true, upsert: true, writeConcern: { w: 'majority', wtimeout: 15000 }});", }, mongoCreds...) - if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil { + + output, err := sh.Command(MongoCMD, args...).Output() + if err != nil { + klog.Errorf("Error while running findAndModify to lock configServer : %s ; output : %s \n", err.Error(), output) + return err + } + s := fmt.Sprintf(`/bin/echo '%s' | /usr/bin/tail -1`, strings.TrimSuffix(string(output), "\n")) + output, err = sh.Command("/bin/sh", "-c", s).Output() + if err != nil { + klog.Errorf("Error while running tail in findAndModify to lock configServer : %s ; output : %s \n", err.Error(), output) + return err + } + + err = json.Unmarshal(output, &v) + if err != nil { + klog.Errorf("Unmarshal error while running findAndModify to lock configServer : %s \n", err.Error()) return err } val, ok := v["counter"].(float64) @@ -739,14 +784,23 @@ func lockSecondaryMember(mongohost string) error { "--quiet", "--eval", "JSON.stringify(db.fsyncLock())", }, mongoCreds...) - if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil { + + output, err := sh.Command(MongoCMD, args...).Output() + if err != nil { + klog.Errorf("Error while running fsyncLock on secondary : %s ; output : %s \n", err.Error(), output) + return err + } + + err = json.Unmarshal(output, &v) + if err != nil { + klog.Errorf("Unmarshal error while running fsyncLock on secondary : %s \n", err.Error()) return err } if val, ok := v["ok"].(float64); !ok || int(val) != 1 { return fmt.Errorf("unable to lock the secondary host. got response: %v", v) } - + klog.Infof("secondary %s locked.", mongohost) return nil } @@ -765,13 +819,21 @@ func unlockSecondaryMember(mongohost string) error { "--quiet", "--eval", "JSON.stringify(db.fsyncUnlock())", }, mongoCreds...) - if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil { + + output, err := sh.Command(MongoCMD, args...).Output() + if err != nil { + klog.Errorf("Error while running fsyncUnlock on secondary : %s ; output : %s \n", err.Error(), output) + return err + } + err = json.Unmarshal(output, &v) + if err != nil { + klog.Errorf("Unmarshal error while running fsyncUnlock on secondary : %s \n", err.Error()) return err } if val, ok := v["ok"].(float64); !ok || int(val) != 1 { return fmt.Errorf("unable to lock the secondary host. got response: %v", v) } - + klog.Infof("secondary %s unlocked.", mongohost) return nil }