Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add configurable presets for name prefixes, tags, etc. #1490

Merged
merged 29 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
e5ac74d
Initial draft of customizable transformation
lennartkats-db Jun 8, 2024
6eaee84
Refactor to one function per transformer
lennartkats-db Jun 11, 2024
601c32b
WIP
lennartkats-db Jun 11, 2024
b16c18c
Remove enabled fields
lennartkats-db Jun 12, 2024
901097a
Add config merging & test
lennartkats-db Jun 12, 2024
a815f30
e2e fixes
lennartkats-db Jun 12, 2024
13630dd
Cleanup
lennartkats-db Jun 12, 2024
e3b0435
Use PAUSED/UNPAUSED instead of a boolean
lennartkats-db Jun 12, 2024
10a1ffc
Rename to transform for now
lennartkats-db Jun 12, 2024
405e202
Merge remote-tracking branch 'databricks/main' into cp-mutator-settings
lennartkats-db Jun 12, 2024
7323d02
Cleanup
lennartkats-db Jun 14, 2024
4dc5f41
Add stricter validations
lennartkats-db Jun 14, 2024
82e1d49
Cleanup
lennartkats-db Jun 19, 2024
6d75e84
Simply mutations, no need for dyn here
lennartkats-db Jun 19, 2024
40b004e
Cleanup
lennartkats-db Jul 5, 2024
29a23cf
Merge remote-tracking branch 'databricks/main' into cp-mutator-settings
lennartkats-db Jul 6, 2024
b353a2f
Rename to presets
lennartkats-db Jul 6, 2024
f636e09
Allow tags to merge instead of override
lennartkats-db Jul 6, 2024
347e24e
Fix test
lennartkats-db Jul 6, 2024
3e003c0
Pause continuous pipelines when 'mode: development' is used
lennartkats-db Jul 11, 2024
40f3bb4
Use extension configuration
lennartkats-db Jul 19, 2024
b1427b3
Address reviewer comments, fix names
lennartkats-db Jul 19, 2024
f2553ff
Merge remote-tracking branch 'databricks/main' into cp-mutator-settings
lennartkats-db Jul 19, 2024
fb902c9
Fix regression in main
lennartkats-db Jul 19, 2024
6159c3c
Merge remote-tracking branch 'databricks/main' into cp-mutator-settings
lennartkats-db Aug 14, 2024
b4564f2
Use bundle.Apply() for tests
lennartkats-db Aug 14, 2024
70d8988
Add assertion
lennartkats-db Aug 16, 2024
a073f84
Merge remote-tracking branch 'databricks/main' into cp-mutator-settings
lennartkats-db Aug 16, 2024
b9e3278
Merge remote-tracking branch 'databricks/main' into cp-mutator-settings
lennartkats-db Aug 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 179 additions & 0 deletions bundle/config/mutator/apply_transforms.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
package mutator

import (
"context"
"path"
"sort"
"strings"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/textutil"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/databricks/databricks-sdk-go/service/ml"
)

type applyTransforms struct{}

// Apply all transforms, e.g. the prefix transform that
// adds a prefix to all names of all resources.
func ApplyTransforms() *applyTransforms {
return &applyTransforms{}
}
lennartkats-db marked this conversation as resolved.
Show resolved Hide resolved

type Tag struct {
Key string
Value string
}

func (m *applyTransforms) Name() string {
return "ApplyTransforms"
}

func (m *applyTransforms) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
diag := validatePauseStatus(b)
if diag != nil {
return diag
}

r := b.Config.Resources
t := b.Config.Transform
prefix := t.Prefix
tags := toTagArray(t.Tags)

// Jobs transforms: Prefix, Tags, JobsMaxConcurrentRuns, TriggerPauseStatus
for i := range r.Jobs {
lennartkats-db marked this conversation as resolved.
Show resolved Hide resolved
r.Jobs[i].Name = prefix + r.Jobs[i].Name
if r.Jobs[i].Tags == nil {
r.Jobs[i].Tags = make(map[string]string)
}
for _, tag := range tags {
if r.Jobs[i].Tags[tag.Key] == "" {
r.Jobs[i].Tags[tag.Key] = tag.Value
}
}
if r.Jobs[i].MaxConcurrentRuns == 0 {
r.Jobs[i].MaxConcurrentRuns = t.DefaultJobsMaxConcurrentRuns
}
if t.DefaultTriggerPauseStatus != "" {
paused := jobs.PauseStatusPaused
if t.DefaultTriggerPauseStatus == config.Unpaused {
paused = jobs.PauseStatusUnpaused
}

if r.Jobs[i].Schedule != nil && r.Jobs[i].Schedule.PauseStatus == "" {
r.Jobs[i].Schedule.PauseStatus = paused
}
if r.Jobs[i].Continuous != nil && r.Jobs[i].Continuous.PauseStatus == "" {
r.Jobs[i].Continuous.PauseStatus = paused
}
if r.Jobs[i].Trigger != nil && r.Jobs[i].Trigger.PauseStatus == "" {
r.Jobs[i].Trigger.PauseStatus = paused
}
}
}

// Pipelines transforms: Prefix, PipelinesDevelopment
for i := range r.Pipelines {
r.Pipelines[i].Name = prefix + r.Pipelines[i].Name
if config.IsExplicitlyEnabled(t.DefaultPipelinesDevelopment) {
r.Pipelines[i].Development = true
}

// As of 2024-06, pipelines don't yet support tags
}

// Models transforms: Prefix, Tags
for i := range r.Models {
r.Models[i].Name = prefix + r.Models[i].Name
for _, t := range tags {
exists := false
for _, modelTag := range r.Models[i].Tags {
if modelTag.Key == t.Key {
exists = true
break
}
}
lennartkats-db marked this conversation as resolved.
Show resolved Hide resolved
if !exists {
r.Models[i].Tags = append(r.Models[i].Tags, ml.ModelTag{Key: t.Key, Value: t.Value})
}
}
}

// Experiments transforms: Prefix, Tags
for i := range r.Experiments {
filepath := r.Experiments[i].Name
dir := path.Dir(filepath)
base := path.Base(filepath)
if dir == "." {
r.Experiments[i].Name = prefix + base
} else {
r.Experiments[i].Name = dir + "/" + prefix + base
}
for _, t := range tags {
exists := false
for _, experimentTag := range r.Experiments[i].Tags {
if experimentTag.Key == t.Key {
exists = true
break
}
}
if !exists {
r.Experiments[i].Tags = append(r.Experiments[i].Tags, ml.ExperimentTag{Key: t.Key, Value: t.Value})
}
}
}

// Model serving endpoint transforms: Prefix
for i := range r.ModelServingEndpoints {
r.ModelServingEndpoints[i].Name = normalizePrefix(prefix) + r.ModelServingEndpoints[i].Name

// As of 2024-06, model serving endpoints don't yet support tags
}

// Registered models transforms: Prefix
for i := range r.RegisteredModels {
r.RegisteredModels[i].Name = normalizePrefix(prefix) + r.RegisteredModels[i].Name

// As of 2024-06, registered models don't yet support tags
}

return nil
}

func validatePauseStatus(b *bundle.Bundle) diag.Diagnostics {
p := b.Config.Transform.DefaultTriggerPauseStatus
if p == "" || p == config.Paused || p == config.Unpaused {
return nil
}
return diag.Diagnostics{{
Summary: "Invalid value for default_trigger_pause_status, should be PAUSED or UNPAUSED",
Severity: diag.Error,
Location: b.Config.GetLocation("transform.default_trigger_pause_status"),
}}
}

// Convert a map of tags to an array of tags.
// We sort tags so we always produce a consistent list of tags.
lennartkats-db marked this conversation as resolved.
Show resolved Hide resolved
func toTagArray(tags *map[string]string) []Tag {
var tagArray []Tag
if tags == nil {
return tagArray
}
for key, value := range *tags {
tagArray = append(tagArray, Tag{Key: key, Value: value})
}
sort.Slice(tagArray, func(i, j int) bool {
return tagArray[i].Key < tagArray[j].Key
})
return tagArray
}

// Normalize prefix strings like '[dev lennart] ' to 'dev_lennart_'.
// We leave unicode letters and numbers but remove all "special characters."
func normalizePrefix(prefix string) string {
prefix = strings.ReplaceAll(prefix, "[", "")
prefix = strings.ReplaceAll(prefix, "] ", "_")
return textutil.NormalizeString(prefix)
}
219 changes: 219 additions & 0 deletions bundle/config/mutator/apply_transforms_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
package mutator_test

import (
"context"
"testing"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/mutator"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/stretchr/testify/assert"
)

func TestTransformPrefix(t *testing.T) {
lennartkats-db marked this conversation as resolved.
Show resolved Hide resolved
tests := []struct {
name string
prefix string
job *resources.Job
want string
}{
{
name: "add prefix to job",
prefix: "prefix-",
job: &resources.Job{
JobSettings: &jobs.JobSettings{
Name: "job1",
},
},
want: "prefix-job1",
},
{
name: "add empty prefix to job",
prefix: "",
job: &resources.Job{
JobSettings: &jobs.JobSettings{
Name: "job1",
},
},
want: "job1",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job1": tt.job,
},
},
Transform: config.Transforms{
Prefix: tt.prefix,
},
},
}

mutator := mutator.ApplyTransforms()
diag := mutator.Apply(context.Background(), b)

if diag.HasError() {
t.Fatalf("unexpected error: %v", diag)
}

if got := b.Config.Resources.Jobs["job1"].Name; got != tt.want {
t.Errorf("got %v, want %v", got, tt.want)
}
lennartkats-db marked this conversation as resolved.
Show resolved Hide resolved
})
}
}

func TestTransformTags(t *testing.T) {
tests := []struct {
name string
tags map[string]string
job *resources.Job
want map[string]string
}{
{
name: "add tags to job",
tags: map[string]string{"env": "dev"},
job: &resources.Job{
JobSettings: &jobs.JobSettings{
Name: "job1",
Tags: nil,
},
},
want: map[string]string{"env": "dev"},
},
{
name: "merge tags with existing job tags",
tags: map[string]string{"env": "dev"},
job: &resources.Job{
JobSettings: &jobs.JobSettings{
Name: "job1",
Tags: map[string]string{"team": "data"},
},
},
want: map[string]string{"env": "dev", "team": "data"},
},
{
name: "don't override existing job tags",
tags: map[string]string{"env": "dev"},
job: &resources.Job{
JobSettings: &jobs.JobSettings{
Name: "job1",
Tags: map[string]string{"env": "prod"},
},
},
want: map[string]string{"env": "prod"},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job1": tt.job,
},
},
Transform: config.Transforms{
Tags: &tt.tags,
},
},
}

mutator := mutator.ApplyTransforms()
diag := mutator.Apply(context.Background(), b)

if diag.HasError() {
t.Fatalf("unexpected error: %v", diag)
}

got := b.Config.Resources.Jobs["job1"].Tags
for k, v := range tt.want {
if got[k] != v {
t.Errorf("tag %v: got %v, want %v", k, got[k], v)
}
}
lennartkats-db marked this conversation as resolved.
Show resolved Hide resolved
})
}
}

func TestTransformJobsMaxConcurrentRuns(t *testing.T) {
tests := []struct {
name string
job *resources.Job
setting int
want int
}{
{
name: "set max concurrent runs",
job: &resources.Job{
JobSettings: &jobs.JobSettings{
Name: "job1",
MaxConcurrentRuns: 0,
},
},
setting: 5,
want: 5,
},
{
name: "do not override existing max concurrent runs",
job: &resources.Job{
JobSettings: &jobs.JobSettings{
Name: "job1",
MaxConcurrentRuns: 3,
},
},
setting: 5,
want: 3,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job1": tt.job,
},
},
Transform: config.Transforms{
DefaultJobsMaxConcurrentRuns: tt.setting,
},
},
}

mutator := mutator.ApplyTransforms()
diag := mutator.Apply(context.Background(), b)

if diag.HasError() {
t.Fatalf("unexpected error: %v", diag)
}

if got := b.Config.Resources.Jobs["job1"].MaxConcurrentRuns; got != tt.want {
t.Errorf("got %v, want %v", got, tt.want)
}
})
}
}

func TestTransformValidation(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Transform: config.Transforms{
DefaultTriggerPauseStatus: "invalid",
},
},
}

mutator := mutator.ApplyTransforms()
diag := mutator.Apply(context.Background(), b)
assert.Equal(t, "Invalid value for default_trigger_pause_status, should be PAUSED or UNPAUSED", diag[0].Summary)
}
Loading
Loading