From ff537a49f27b911e7424dd7448d59a96666521ee Mon Sep 17 00:00:00 2001 From: "daniel.esponda" Date: Tue, 4 Feb 2020 11:55:37 -0600 Subject: [PATCH 1/3] Adding jitter basic code --- chaoskube/chaoskube.go | 3 ++- chaoskube/chaoskube_test.go | 2 +- main.go | 8 ++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/chaoskube/chaoskube.go b/chaoskube/chaoskube.go index ce302d65..7f9b794a 100644 --- a/chaoskube/chaoskube.go +++ b/chaoskube/chaoskube.go @@ -123,8 +123,9 @@ func New(client kubernetes.Interface, labels, annotations, namespaces, namespace // Run continuously picks and terminates a victim pod at a given interval // described by channel next. It returns when the given context is canceled. -func (c *Chaoskube) Run(ctx context.Context, next <-chan time.Time) { +func (c *Chaoskube) Run(ctx context.Context, jitter time.Duration, next <-chan time.Time) { for { + time.Sleep(jitter) if err := c.TerminateVictims(); err != nil { c.Logger.WithField("err", err).Error("failed to terminate victim") metrics.ErrorsTotal.Inc() diff --git a/chaoskube/chaoskube_test.go b/chaoskube/chaoskube_test.go index c0fda563..b8f44254 100644 --- a/chaoskube/chaoskube_test.go +++ b/chaoskube/chaoskube_test.go @@ -124,7 +124,7 @@ func (suite *Suite) TestRunContextCanceled() { ctx, cancel := context.WithCancel(context.Background()) cancel() - chaoskube.Run(ctx, nil) + chaoskube.Run(ctx, time.Duration(0), nil) } // TestCandidates tests that the various pod filters are applied correctly. diff --git a/main.go b/main.go index 31407614..8dfa2fba 100644 --- a/main.go +++ b/main.go @@ -15,9 +15,9 @@ import ( "syscall" "time" - "gopkg.in/alecthomas/kingpin.v2" "github.com/prometheus/client_golang/prometheus/promhttp" log "github.com/sirupsen/logrus" + "gopkg.in/alecthomas/kingpin.v2" "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/kubernetes" @@ -51,6 +51,7 @@ var ( master string kubeconfig string interval time.Duration + jitter time.Duration dryRun bool debug bool metricsAddress string @@ -79,6 +80,7 @@ func init() { kingpin.Flag("master", "The address of the Kubernetes cluster to target").StringVar(&master) kingpin.Flag("kubeconfig", "Path to a kubeconfig file").StringVar(&kubeconfig) kingpin.Flag("interval", "Interval between Pod terminations").Default("10m").DurationVar(&interval) + kingpin.Flag("jitter", "The max duration of jitter to add or substract from the interval").Default("0s").DurationVar(&jitter) kingpin.Flag("dry-run", "Don't actually kill any pod. Turned on by default. Turn off with `--no-dry-run`.").Default("true").BoolVar(&dryRun) kingpin.Flag("debug", "Enable debug logging.").BoolVar(&debug) kingpin.Flag("metrics-address", "Listening address for metrics handler").Default(":8080").StringVar(&metricsAddress) @@ -121,6 +123,7 @@ func main() { "master": master, "kubeconfig": kubeconfig, "interval": interval, + "jitter": jitter, "dryRun": dryRun, "debug": debug, "metricsAddress": metricsAddress, @@ -133,6 +136,7 @@ func main() { "version": version, "dryRun": dryRun, "interval": interval, + "jitter": jitter, }).Info("starting up") client, err := newClient() @@ -235,7 +239,7 @@ func main() { ticker := time.NewTicker(interval) defer ticker.Stop() - chaoskube.Run(ctx, ticker.C) + chaoskube.Run(ctx, jitter, ticker.C) } func newClient() (*kubernetes.Clientset, error) { From 4be775002dcc398c960cbc3ded283db23cc3e11c Mon Sep 17 00:00:00 2001 From: "daniel.esponda" Date: Tue, 4 Feb 2020 12:46:00 -0600 Subject: [PATCH 2/3] Adding randomness to the jitter, utility function and test, and updating documentation --- README.md | 1 + chaoskube/chaoskube.go | 3 ++- main.go | 23 +++++++++++++++-------- util/util.go | 10 ++++++++++ util/util_test.go | 6 ++++++ 5 files changed, 34 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 59f8f7ef..cf6e31dd 100644 --- a/README.md +++ b/README.md @@ -183,6 +183,7 @@ Use `UTC`, `Local` or pick a timezone name from the [(IANA) tz database](https:/ | Option | Description | Default | |---------------------------|----------------------------------------------------------------------|----------------------------| | `--interval` | interval between pod terminations | 10m | +| `--max-jitter` | max jitter to add to the interval | 0s | | `--labels` | label selector to filter pods by | (matches everything) | | `--annotations` | annotation selector to filter pods by | (matches everything) | | `--namespaces` | namespace selector to filter pods by | (all namespaces) | diff --git a/chaoskube/chaoskube.go b/chaoskube/chaoskube.go index 7f9b794a..7d885bfa 100644 --- a/chaoskube/chaoskube.go +++ b/chaoskube/chaoskube.go @@ -123,8 +123,9 @@ func New(client kubernetes.Interface, labels, annotations, namespaces, namespace // Run continuously picks and terminates a victim pod at a given interval // described by channel next. It returns when the given context is canceled. -func (c *Chaoskube) Run(ctx context.Context, jitter time.Duration, next <-chan time.Time) { +func (c *Chaoskube) Run(ctx context.Context, maxJitter time.Duration, next <-chan time.Time) { for { + jitter := util.RandomJitter(maxJitter) time.Sleep(jitter) if err := c.TerminateVictims(); err != nil { c.Logger.WithField("err", err).Error("failed to terminate victim") diff --git a/main.go b/main.go index 8dfa2fba..54d53d87 100644 --- a/main.go +++ b/main.go @@ -51,7 +51,7 @@ var ( master string kubeconfig string interval time.Duration - jitter time.Duration + maxJitter time.Duration dryRun bool debug bool metricsAddress string @@ -80,7 +80,7 @@ func init() { kingpin.Flag("master", "The address of the Kubernetes cluster to target").StringVar(&master) kingpin.Flag("kubeconfig", "Path to a kubeconfig file").StringVar(&kubeconfig) kingpin.Flag("interval", "Interval between Pod terminations").Default("10m").DurationVar(&interval) - kingpin.Flag("jitter", "The max duration of jitter to add or substract from the interval").Default("0s").DurationVar(&jitter) + kingpin.Flag("max-jitter", "The max duration of jitter to add to the interval").Default("0s").DurationVar(&maxJitter) kingpin.Flag("dry-run", "Don't actually kill any pod. Turned on by default. Turn off with `--no-dry-run`.").Default("true").BoolVar(&dryRun) kingpin.Flag("debug", "Enable debug logging.").BoolVar(&debug) kingpin.Flag("metrics-address", "Listening address for metrics handler").Default(":8080").StringVar(&metricsAddress) @@ -123,7 +123,7 @@ func main() { "master": master, "kubeconfig": kubeconfig, "interval": interval, - "jitter": jitter, + "maxJitter": maxJitter, "dryRun": dryRun, "debug": debug, "metricsAddress": metricsAddress, @@ -133,10 +133,10 @@ func main() { }).Debug("reading config") log.WithFields(log.Fields{ - "version": version, - "dryRun": dryRun, - "interval": interval, - "jitter": jitter, + "version": version, + "dryRun": dryRun, + "interval": interval, + "maxJitter": maxJitter, }).Info("starting up") client, err := newClient() @@ -162,6 +162,13 @@ func main() { "maxKill": maxKill, }).Info("setting pod filter") + if interval <= maxJitter { + log.WithFields(log.Fields{ + "interval": interval, + "maxJitter": maxJitter, + }).Fatal("maxJitter must be less than interval") + } + parsedWeekdays := util.ParseWeekdays(excludedWeekdays) parsedTimesOfDay, err := util.ParseTimePeriods(excludedTimesOfDay) if err != nil { @@ -239,7 +246,7 @@ func main() { ticker := time.NewTicker(interval) defer ticker.Stop() - chaoskube.Run(ctx, jitter, ticker.C) + chaoskube.Run(ctx, maxJitter, ticker.C) } func newClient() (*kubernetes.Clientset, error) { diff --git a/util/util.go b/util/util.go index e0170288..ca62117f 100644 --- a/util/util.go +++ b/util/util.go @@ -195,3 +195,13 @@ func RandomPodSubSlice(pods []v1.Pod, count int) []v1.Pod { res := pods[0:count] return res } + +// RandomJitter returns a random jitter based off maxJitter +func RandomJitter(maxJitter time.Duration) time.Duration { + if maxJitter.Nanoseconds() == 0 { + return time.Duration(0) + } + seed := rand.NewSource(time.Now().UnixNano()) + rng := rand.New(seed) + return time.Duration(rng.Int63n(maxJitter.Nanoseconds())) +} diff --git a/util/util_test.go b/util/util_test.go index 98e073de..c18b7205 100644 --- a/util/util_test.go +++ b/util/util_test.go @@ -431,6 +431,12 @@ func (suite *Suite) TestRandomPodSublice() { } } +func (suite *Suite) TestRandomJitter() { + zeroJitter := time.Duration(0) + randomJitter := RandomJitter(zeroJitter) + suite.Equal(int64(0), randomJitter.Nanoseconds()) +} + func TestSuite(t *testing.T) { suite.Run(t, new(Suite)) } From 1d54e0f785e70f5e47e12be6be305472ae0c0688 Mon Sep 17 00:00:00 2001 From: "daniel.esponda" Date: Tue, 4 Feb 2020 12:51:36 -0600 Subject: [PATCH 3/3] Fix spelling error --- util/util.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/util.go b/util/util.go index ca62117f..b9bbc033 100644 --- a/util/util.go +++ b/util/util.go @@ -196,7 +196,7 @@ func RandomPodSubSlice(pods []v1.Pod, count int) []v1.Pod { return res } -// RandomJitter returns a random jitter based off maxJitter +// RandomJitter returns a random jitter based of maxJitter func RandomJitter(maxJitter time.Duration) time.Duration { if maxJitter.Nanoseconds() == 0 { return time.Duration(0)