From ebcb3660671774d126292c92a6a46b6560952a5f Mon Sep 17 00:00:00 2001 From: Filipe Pina <636320+fopina@users.noreply.github.com> Date: Mon, 16 Sep 2024 05:40:13 +0100 Subject: [PATCH] add support for hash scheduling (#1260) * add support for hash scheduling * docs updated --- dkron/job.go | 63 +++++++++++++++++++++++++++++++-- dkron/job_test.go | 12 +++++++ dkron/scheduler.go | 2 +- website/docs/usage/cron-spec.md | 16 +++++---- 4 files changed, 84 insertions(+), 9 deletions(-) diff --git a/dkron/job.go b/dkron/job.go index ab6f45941..a92372100 100644 --- a/dkron/job.go +++ b/dkron/job.go @@ -4,6 +4,8 @@ import ( "errors" "fmt" "regexp" + "strconv" + "strings" "time" "github.com/distribworks/dkron/v4/extcron" @@ -31,6 +33,9 @@ const ( ConcurrencyAllow = "allow" // ConcurrencyForbid forbids a job from executing concurrency. ConcurrencyForbid = "forbid" + + // HashSymbol is the "magic" character used in scheduled to be replaced with a value based on job name + HashSymbol = "~" ) var ( @@ -303,10 +308,64 @@ func (j *Job) GetTimeLocation() *time.Location { return loc } +// nameHash returns hash code of the job name +func (j *Job) nameHash() int { + hash := 0 + for _, c := range j.Name { + hash += int(c) + } + return hash +} + +// scheduleHash replaces H in the cron spec by a value derived from job Name +// such as "0 0 ~ * * *" +func (j *Job) scheduleHash() string { + spec := j.Schedule + + if !strings.Contains(spec, HashSymbol) { + return spec + } + + hash := j.nameHash() + parts := strings.Split(spec, " ") + partIndex := 0 + for index, part := range parts { + if strings.HasPrefix(part, "@") { + // this is a pre-defined scheduled, ignore everything + return spec + } + if strings.HasPrefix(part, "TZ=") || strings.HasPrefix(part, "CRON_TZ=") { + // do not increase partIndex + continue + } + + if strings.Contains(part, HashSymbol) { + // mods taken in accordance with https://dkron.io/docs/usage/cron-spec/#cron-expression-format + partHash := hash + switch partIndex { + case 2: + partHash %= 24 + case 3: + partHash = (partHash % 28) + 1 + case 4: + partHash = (partHash % 12) + 1 + case 5: + partHash %= 7 + default: + partHash %= 60 + } + parts[index] = strings.ReplaceAll(part, HashSymbol, strconv.Itoa(partHash)) + } + + partIndex++ + } + return strings.Join(parts, " ") +} + // GetNext returns the job's next schedule from now func (j *Job) GetNext() (time.Time, error) { if j.Schedule != "" { - s, err := extcron.Parse(j.Schedule) + s, err := extcron.Parse(j.scheduleHash()) if err != nil { return time.Time{}, err } @@ -367,7 +426,7 @@ func (j *Job) Validate() error { // Validate schedule, allow empty schedule if parent job set. if j.Schedule != "" || j.ParentJob == "" { - if _, err := extcron.Parse(j.Schedule); err != nil { + if _, err := extcron.Parse(j.scheduleHash()); err != nil { return fmt.Errorf("%s: %s", ErrScheduleParse.Error(), err) } } diff --git a/dkron/job_test.go b/dkron/job_test.go index 1407cc692..a144a23a4 100644 --- a/dkron/job_test.go +++ b/dkron/job_test.go @@ -192,6 +192,18 @@ func Test_isRunnable(t *testing.T) { } } +func Test_scheduleHash(t *testing.T) { + job := &Job{ + Name: "test_job", + } + job.Schedule = "0 0 ~ * * *" + assert.Equal(t, "0 0 18 * * *", job.scheduleHash()) + job.Schedule = "TZ=Europe/Madrid 0 0 1 * ~ *" + assert.Equal(t, "TZ=Europe/Madrid 0 0 1 * 7 *", job.scheduleHash()) + job.Schedule = "TZ=Europe/Madrid @at something with ~" + assert.Equal(t, "TZ=Europe/Madrid @at something with ~", job.scheduleHash()) +} + type gRPCClientMock struct { } diff --git a/dkron/scheduler.go b/dkron/scheduler.go index ab62c380b..5a17879bd 100644 --- a/dkron/scheduler.go +++ b/dkron/scheduler.go @@ -159,7 +159,7 @@ func (s *Scheduler) AddJob(job *Job) error { // If Timezone is set on the job, and not explicitly in its schedule, // AND its not a descriptor (that don't support timezones), add the // timezone to the schedule so robfig/cron knows about it. - schedule := job.Schedule + schedule := job.scheduleHash() if job.Timezone != "" && !strings.HasPrefix(schedule, "@") && !strings.HasPrefix(schedule, "TZ=") && diff --git a/website/docs/usage/cron-spec.md b/website/docs/usage/cron-spec.md index 77baf8025..13d3d2e23 100644 --- a/website/docs/usage/cron-spec.md +++ b/website/docs/usage/cron-spec.md @@ -9,12 +9,12 @@ A cron expression represents a set of times, using 6 space-separated fields. Field name | Mandatory? | Allowed values | Allowed special characters ---------- | ---------- | -------------- | -------------------------- - Seconds | Yes | 0-59 | * / , - - Minutes | Yes | 0-59 | * / , - - Hours | Yes | 0-23 | * / , - - Day of month | Yes | 1-31 | * / , - ? - Month | Yes | 1-12 or JAN-DEC | * / , - - Day of week | Yes | 0-6 or SUN-SAT | * / , - ? + Seconds | Yes | 0-59 | * / , - ~ + Minutes | Yes | 0-59 | * / , - ~ + Hours | Yes | 0-23 | * / , - ~ + Day of month | Yes | 1-31 | * / , - ? ~ + Month | Yes | 1-12 or JAN-DEC | * / , - ~ + Day of week | Yes | 0-6 or SUN-SAT | * / , - ? ~ Note: Month and Day-of-week field values are case insensitive. "SUN", "Sun", and "sun" are equally accepted. @@ -51,6 +51,10 @@ Question mark ( ? ) Question mark may be used instead of '*' for leaving either day-of-month or day-of-week blank. +Tilde ( ~ ) + +Tilde will be replaced by a numeric value valid for the range where it is used. It allows periodically scheduled tasks to produce even load on the system. For example, scheduling multiple hourly jobs to "0 ~ * * * *" rather than "0 0 * * * *" will run the jobs at different minutes of every hour. It can be thought of as a random value over a range, but it actually is a hash of the job name, not a random function, so that the value remains stable for any given job. + ### Predefined schedules You may use one of several pre-defined schedules in place of a cron expression.