From 5a322a4795723082207b007ab09b878889e00162 Mon Sep 17 00:00:00 2001 From: Silvio Moioli Date: Wed, 31 May 2023 11:45:05 +0200 Subject: [PATCH 1/5] leaderelection: configure all timeouts via environment variables Signed-off-by: Silvio Moioli --- pkg/leader/leader.go | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/pkg/leader/leader.go b/pkg/leader/leader.go index 8d9f1849..23a9f934 100644 --- a/pkg/leader/leader.go +++ b/pkg/leader/leader.go @@ -2,6 +2,7 @@ package leader import ( "context" + "fmt" "os" "time" @@ -13,6 +14,13 @@ import ( type Callback func(cb context.Context) +const defaultLeaseDuration = 45 * time.Second +const defaultRenewDeadline = 30 * time.Second +const defaultRetryPeriod = 2 * time.Second + +const developmentLeaseDuration = 45 * time.Hour +const developmentRenewDeadline = 30 * time.Hour + func RunOrDie(ctx context.Context, namespace, name string, client kubernetes.Interface, cb Callback) { if namespace == "" { namespace = "kube-system" @@ -43,16 +51,37 @@ func run(ctx context.Context, namespace, name string, client kubernetes.Interfac logrus.Fatalf("error creating leader lock for %s: %v", name, err) } - t := time.Second - if dl := os.Getenv("CATTLE_DEV_MODE"); dl != "" { - t = time.Hour + leaseDuration := defaultLeaseDuration + renewDeadline := defaultRenewDeadline + retryPeriod := defaultRetryPeriod + if d := os.Getenv("CATTLE_DEV_MODE"); d != "" { + leaseDuration = developmentLeaseDuration + renewDeadline = developmentRenewDeadline + } + if d := os.Getenv("CATTLE_ELECTION_LEASE_DURATION"); d != "" { + leaseDuration, err = time.ParseDuration(d) + if err != nil { + return fmt.Errorf("CATTLE_ELECTION_LEASE_DURATION value [%s] is not a valid duration: %w", d, err) + } + } + if d := os.Getenv("CATTLE_ELECTION_RENEW_DEADLINE"); d != "" { + renewDeadline, err = time.ParseDuration(d) + if err != nil { + return fmt.Errorf("CATTLE_ELECTION_RENEW_DEADLINE value [%s] is not a valid duration: %w", d, err) + } + } + if d := os.Getenv("CATTLE_ELECTION_RETRY_PERIOD"); d != "" { + retryPeriod, err = time.ParseDuration(d) + if err != nil { + return fmt.Errorf("CATTLE_ELECTION_RETRY_PERIOD value [%s] is not a valid duration: %w", d, err) + } } leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{ Lock: rl, - LeaseDuration: 45 * t, - RenewDeadline: 30 * t, - RetryPeriod: 2 * time.Second, + LeaseDuration: leaseDuration, + RenewDeadline: renewDeadline, + RetryPeriod: retryPeriod, Callbacks: leaderelection.LeaderCallbacks{ OnStartedLeading: func(ctx context.Context) { go cb(ctx) From d6f57f5eff31e2d65d2b6e6fb41db5637273bc8b Mon Sep 17 00:00:00 2001 From: Silvio Moioli Date: Mon, 28 Aug 2023 11:38:01 +0200 Subject: [PATCH 2/5] leaderelection: refactoring: extract timeout configuration setting into separate function Signed-off-by: Silvio Moioli --- pkg/leader/leader.go | 82 +++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/pkg/leader/leader.go b/pkg/leader/leader.go index 23a9f934..d04fbf5d 100644 --- a/pkg/leader/leader.go +++ b/pkg/leader/leader.go @@ -51,9 +51,46 @@ func run(ctx context.Context, namespace, name string, client kubernetes.Interfac logrus.Fatalf("error creating leader lock for %s: %v", name, err) } + cbs := leaderelection.LeaderCallbacks{ + OnStartedLeading: func(ctx context.Context) { + go cb(ctx) + }, + OnStoppedLeading: func() { + select { + case <-ctx.Done(): + // The context has been canceled or is otherwise complete. + // This is a request to terminate. Exit 0. + // Exiting cleanly is useful when the context is canceled + // so that Kubernetes doesn't record it exiting in error + // when the exit was requested. For example, the wrangler-cli + // package sets up a context that cancels when SIGTERM is + // sent in. If a node is shut down this is the type of signal + // sent. In that case you want the 0 exit code to mark it as + // complete so that everything comes back up correctly after + // a restart. + // The pattern found here can be found inside the kube-scheduler. + logrus.Info("requested to terminate, exiting") + os.Exit(0) + default: + logrus.Fatalf("leaderelection lost for %s", name) + } + }, + } + + config, err := computeConfig(rl, cbs) + if err != nil { + return err + } + + leaderelection.RunOrDie(ctx, *config) + panic("unreachable") +} + +func computeConfig(rl resourcelock.Interface, cbs leaderelection.LeaderCallbacks) (*leaderelection.LeaderElectionConfig, error) { leaseDuration := defaultLeaseDuration renewDeadline := defaultRenewDeadline retryPeriod := defaultRetryPeriod + var err error if d := os.Getenv("CATTLE_DEV_MODE"); d != "" { leaseDuration = developmentLeaseDuration renewDeadline = developmentRenewDeadline @@ -61,53 +98,28 @@ func run(ctx context.Context, namespace, name string, client kubernetes.Interfac if d := os.Getenv("CATTLE_ELECTION_LEASE_DURATION"); d != "" { leaseDuration, err = time.ParseDuration(d) if err != nil { - return fmt.Errorf("CATTLE_ELECTION_LEASE_DURATION value [%s] is not a valid duration: %w", d, err) + return nil, fmt.Errorf("CATTLE_ELECTION_LEASE_DURATION value [%s] is not a valid duration: %w", d, err) } } if d := os.Getenv("CATTLE_ELECTION_RENEW_DEADLINE"); d != "" { renewDeadline, err = time.ParseDuration(d) if err != nil { - return fmt.Errorf("CATTLE_ELECTION_RENEW_DEADLINE value [%s] is not a valid duration: %w", d, err) + return nil, fmt.Errorf("CATTLE_ELECTION_RENEW_DEADLINE value [%s] is not a valid duration: %w", d, err) } } if d := os.Getenv("CATTLE_ELECTION_RETRY_PERIOD"); d != "" { retryPeriod, err = time.ParseDuration(d) if err != nil { - return fmt.Errorf("CATTLE_ELECTION_RETRY_PERIOD value [%s] is not a valid duration: %w", d, err) + return nil, fmt.Errorf("CATTLE_ELECTION_RETRY_PERIOD value [%s] is not a valid duration: %w", d, err) } } - leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{ - Lock: rl, - LeaseDuration: leaseDuration, - RenewDeadline: renewDeadline, - RetryPeriod: retryPeriod, - Callbacks: leaderelection.LeaderCallbacks{ - OnStartedLeading: func(ctx context.Context) { - go cb(ctx) - }, - OnStoppedLeading: func() { - select { - case <-ctx.Done(): - // The context has been canceled or is otherwise complete. - // This is a request to terminate. Exit 0. - // Exiting cleanly is useful when the context is canceled - // so that Kubernetes doesn't record it exiting in error - // when the exit was requested. For example, the wrangler-cli - // package sets up a context that cancels when SIGTERM is - // sent in. If a node is shut down this is the type of signal - // sent. In that case you want the 0 exit code to mark it as - // complete so that everything comes back up correctly after - // a restart. - // The pattern found here can be found inside the kube-scheduler. - logrus.Info("requested to terminate, exiting") - os.Exit(0) - default: - logrus.Fatalf("leaderelection lost for %s", name) - } - }, - }, + return &leaderelection.LeaderElectionConfig{ + Lock: rl, + LeaseDuration: leaseDuration, + RenewDeadline: renewDeadline, + RetryPeriod: retryPeriod, + Callbacks: cbs, ReleaseOnCancel: true, - }) - panic("unreachable") + }, nil } From 7c35dd2b17c17e85a5d1fdde7ccad304de503f08 Mon Sep 17 00:00:00 2001 From: Silvio Moioli Date: Mon, 28 Aug 2023 11:38:26 +0200 Subject: [PATCH 3/5] leaderelection: add unit tests for timeout setting function Signed-off-by: Silvio Moioli --- pkg/leader/leader_test.go | 149 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 pkg/leader/leader_test.go diff --git a/pkg/leader/leader_test.go b/pkg/leader/leader_test.go new file mode 100644 index 00000000..8212ef7e --- /dev/null +++ b/pkg/leader/leader_test.go @@ -0,0 +1,149 @@ +package leader + +import ( + "os" + "reflect" + "testing" + "time" + + "k8s.io/client-go/tools/leaderelection" + "k8s.io/client-go/tools/leaderelection/resourcelock" +) + +func Test_computeConfig(t *testing.T) { + type args struct { + rl resourcelock.Interface + cbs leaderelection.LeaderCallbacks + } + type env struct { + key string + value string + } + tests := []struct { + name string + args args + envs []env + want *leaderelection.LeaderElectionConfig + wantErr bool + }{ + { + name: "all defaults", + args: args{ + rl: nil, + cbs: leaderelection.LeaderCallbacks{}, + }, + envs: []env{}, + want: &leaderelection.LeaderElectionConfig{ + Lock: nil, + LeaseDuration: defaultLeaseDuration, + RenewDeadline: defaultRenewDeadline, + RetryPeriod: defaultRetryPeriod, + Callbacks: leaderelection.LeaderCallbacks{}, + ReleaseOnCancel: true, + }, + wantErr: false, + }, + { + name: "dev mode", + args: args{ + rl: nil, + cbs: leaderelection.LeaderCallbacks{}, + }, + envs: []env{ + {key: "CATTLE_DEV_MODE", value: "true"}, + }, + want: &leaderelection.LeaderElectionConfig{ + Lock: nil, + LeaseDuration: developmentLeaseDuration, + RenewDeadline: developmentRenewDeadline, + RetryPeriod: defaultRetryPeriod, + Callbacks: leaderelection.LeaderCallbacks{}, + ReleaseOnCancel: true, + }, + wantErr: false, + }, + { + name: "all overridden", + args: args{ + rl: nil, + cbs: leaderelection.LeaderCallbacks{}, + }, + envs: []env{ + {key: "CATTLE_DEV_MODE", value: "true"}, + {key: "CATTLE_ELECTION_LEASE_DURATION", value: "1s"}, + {key: "CATTLE_ELECTION_RENEW_DEADLINE", value: "2s"}, + {key: "CATTLE_ELECTION_RETRY_PERIOD", value: "3m"}, + }, + want: &leaderelection.LeaderElectionConfig{ + Lock: nil, + LeaseDuration: time.Second, + RenewDeadline: 2 * time.Second, + RetryPeriod: 3 * time.Minute, + Callbacks: leaderelection.LeaderCallbacks{}, + ReleaseOnCancel: true, + }, + wantErr: false, + }, + { + name: "unparseable lease duration", + args: args{ + rl: nil, + cbs: leaderelection.LeaderCallbacks{}, + }, + envs: []env{ + {key: "CATTLE_ELECTION_LEASE_DURATION", value: "bomb"}, + {key: "CATTLE_ELECTION_RENEW_DEADLINE", value: "2s"}, + {key: "CATTLE_ELECTION_RETRY_PERIOD", value: "3m"}, + }, + want: nil, + wantErr: true, + }, + { + name: "unparseable renew deadline", + args: args{ + rl: nil, + cbs: leaderelection.LeaderCallbacks{}, + }, + envs: []env{ + {key: "CATTLE_ELECTION_LEASE_DURATION", value: "1s"}, + {key: "CATTLE_ELECTION_RENEW_DEADLINE", value: "bomb"}, + {key: "CATTLE_ELECTION_RETRY_PERIOD", value: "3m"}, + }, + want: nil, + wantErr: true, + }, + { + name: "unparseable retry period", + args: args{ + rl: nil, + cbs: leaderelection.LeaderCallbacks{}, + }, + envs: []env{ + {key: "CATTLE_ELECTION_LEASE_DURATION", value: "1s"}, + {key: "CATTLE_ELECTION_RENEW_DEADLINE", value: "2s"}, + {key: "CATTLE_ELECTION_RETRY_PERIOD", value: "bomb"}, + }, + want: nil, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + for _, e := range tt.envs { + err := os.Setenv(e.key, e.value) + if err != nil { + t.Errorf("could not SetEnv: %v", err) + return + } + } + got, err := computeConfig(tt.args.rl, tt.args.cbs) + if (err != nil) != tt.wantErr { + t.Errorf("computeConfig() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("computeConfig() got = %v, want %v", got, tt.want) + } + }) + } +} From 05cbd3a3619decb71e8b0f9192a5b482e9df042a Mon Sep 17 00:00:00 2001 From: Silvio Moioli Date: Mon, 28 Aug 2023 16:14:15 +0200 Subject: [PATCH 4/5] leaderelection: refactoring: extract env keys as consts Signed-off-by: Silvio Moioli --- pkg/leader/leader.go | 19 ++++++++++++------- pkg/leader/leader_test.go | 28 ++++++++++++++-------------- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/pkg/leader/leader.go b/pkg/leader/leader.go index d04fbf5d..5131c359 100644 --- a/pkg/leader/leader.go +++ b/pkg/leader/leader.go @@ -14,6 +14,11 @@ import ( type Callback func(cb context.Context) +const devModeEnvKey = "CATTLE_DEV_MODE" +const leaseDurationEnvKey = "CATTLE_ELECTION_LEASE_DURATION" +const renewDeadlineEnvKey = "CATTLE_ELECTION_RENEW_DEADLINE" +const retryPeriodEnvKey = "CATTLE_ELECTION_RETRY_PERIOD" + const defaultLeaseDuration = 45 * time.Second const defaultRenewDeadline = 30 * time.Second const defaultRetryPeriod = 2 * time.Second @@ -91,26 +96,26 @@ func computeConfig(rl resourcelock.Interface, cbs leaderelection.LeaderCallbacks renewDeadline := defaultRenewDeadline retryPeriod := defaultRetryPeriod var err error - if d := os.Getenv("CATTLE_DEV_MODE"); d != "" { + if d := os.Getenv(devModeEnvKey); d != "" { leaseDuration = developmentLeaseDuration renewDeadline = developmentRenewDeadline } - if d := os.Getenv("CATTLE_ELECTION_LEASE_DURATION"); d != "" { + if d := os.Getenv(leaseDurationEnvKey); d != "" { leaseDuration, err = time.ParseDuration(d) if err != nil { - return nil, fmt.Errorf("CATTLE_ELECTION_LEASE_DURATION value [%s] is not a valid duration: %w", d, err) + return nil, fmt.Errorf("%s value [%s] is not a valid duration: %w", leaseDurationEnvKey, d, err) } } - if d := os.Getenv("CATTLE_ELECTION_RENEW_DEADLINE"); d != "" { + if d := os.Getenv(renewDeadlineEnvKey); d != "" { renewDeadline, err = time.ParseDuration(d) if err != nil { - return nil, fmt.Errorf("CATTLE_ELECTION_RENEW_DEADLINE value [%s] is not a valid duration: %w", d, err) + return nil, fmt.Errorf("%s value [%s] is not a valid duration: %w", renewDeadlineEnvKey, d, err) } } - if d := os.Getenv("CATTLE_ELECTION_RETRY_PERIOD"); d != "" { + if d := os.Getenv(retryPeriodEnvKey); d != "" { retryPeriod, err = time.ParseDuration(d) if err != nil { - return nil, fmt.Errorf("CATTLE_ELECTION_RETRY_PERIOD value [%s] is not a valid duration: %w", d, err) + return nil, fmt.Errorf("%s value [%s] is not a valid duration: %w", retryPeriodEnvKey, d, err) } } diff --git a/pkg/leader/leader_test.go b/pkg/leader/leader_test.go index 8212ef7e..c6ca7655 100644 --- a/pkg/leader/leader_test.go +++ b/pkg/leader/leader_test.go @@ -50,7 +50,7 @@ func Test_computeConfig(t *testing.T) { cbs: leaderelection.LeaderCallbacks{}, }, envs: []env{ - {key: "CATTLE_DEV_MODE", value: "true"}, + {key: devModeEnvKey, value: "true"}, }, want: &leaderelection.LeaderElectionConfig{ Lock: nil, @@ -69,10 +69,10 @@ func Test_computeConfig(t *testing.T) { cbs: leaderelection.LeaderCallbacks{}, }, envs: []env{ - {key: "CATTLE_DEV_MODE", value: "true"}, - {key: "CATTLE_ELECTION_LEASE_DURATION", value: "1s"}, - {key: "CATTLE_ELECTION_RENEW_DEADLINE", value: "2s"}, - {key: "CATTLE_ELECTION_RETRY_PERIOD", value: "3m"}, + {key: devModeEnvKey, value: "true"}, + {key: leaseDurationEnvKey, value: "1s"}, + {key: renewDeadlineEnvKey, value: "2s"}, + {key: retryPeriodEnvKey, value: "3m"}, }, want: &leaderelection.LeaderElectionConfig{ Lock: nil, @@ -91,9 +91,9 @@ func Test_computeConfig(t *testing.T) { cbs: leaderelection.LeaderCallbacks{}, }, envs: []env{ - {key: "CATTLE_ELECTION_LEASE_DURATION", value: "bomb"}, - {key: "CATTLE_ELECTION_RENEW_DEADLINE", value: "2s"}, - {key: "CATTLE_ELECTION_RETRY_PERIOD", value: "3m"}, + {key: leaseDurationEnvKey, value: "bomb"}, + {key: renewDeadlineEnvKey, value: "2s"}, + {key: retryPeriodEnvKey, value: "3m"}, }, want: nil, wantErr: true, @@ -105,9 +105,9 @@ func Test_computeConfig(t *testing.T) { cbs: leaderelection.LeaderCallbacks{}, }, envs: []env{ - {key: "CATTLE_ELECTION_LEASE_DURATION", value: "1s"}, - {key: "CATTLE_ELECTION_RENEW_DEADLINE", value: "bomb"}, - {key: "CATTLE_ELECTION_RETRY_PERIOD", value: "3m"}, + {key: leaseDurationEnvKey, value: "1s"}, + {key: renewDeadlineEnvKey, value: "bomb"}, + {key: retryPeriodEnvKey, value: "3m"}, }, want: nil, wantErr: true, @@ -119,9 +119,9 @@ func Test_computeConfig(t *testing.T) { cbs: leaderelection.LeaderCallbacks{}, }, envs: []env{ - {key: "CATTLE_ELECTION_LEASE_DURATION", value: "1s"}, - {key: "CATTLE_ELECTION_RENEW_DEADLINE", value: "2s"}, - {key: "CATTLE_ELECTION_RETRY_PERIOD", value: "bomb"}, + {key: leaseDurationEnvKey, value: "1s"}, + {key: renewDeadlineEnvKey, value: "2s"}, + {key: retryPeriodEnvKey, value: "bomb"}, }, want: nil, wantErr: true, From ef1a21e4a1e95769957ed07afe53bdaebff6e16d Mon Sep 17 00:00:00 2001 From: Silvio Moioli Date: Mon, 28 Aug 2023 16:14:29 +0200 Subject: [PATCH 5/5] make test ordering-robust Signed-off-by: Silvio Moioli --- pkg/leader/leader_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pkg/leader/leader_test.go b/pkg/leader/leader_test.go index c6ca7655..f173e423 100644 --- a/pkg/leader/leader_test.go +++ b/pkg/leader/leader_test.go @@ -129,6 +129,13 @@ func Test_computeConfig(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + for _, e := range []string{leaseDurationEnvKey, renewDeadlineEnvKey, retryPeriodEnvKey} { + err := os.Unsetenv(e) + if err != nil { + t.Errorf("could not Unsetenv: %v", err) + return + } + } for _, e := range tt.envs { err := os.Setenv(e.key, e.value) if err != nil {