Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add configurable presets for name prefixes, tags, etc. #1490

Merged
merged 29 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
e5ac74d
Initial draft of customizable transformation
lennartkats-db Jun 8, 2024
6eaee84
Refactor to one function per transformer
lennartkats-db Jun 11, 2024
601c32b
WIP
lennartkats-db Jun 11, 2024
b16c18c
Remove enabled fields
lennartkats-db Jun 12, 2024
901097a
Add config merging & test
lennartkats-db Jun 12, 2024
a815f30
e2e fixes
lennartkats-db Jun 12, 2024
13630dd
Cleanup
lennartkats-db Jun 12, 2024
e3b0435
Use PAUSED/UNPAUSED instead of a boolean
lennartkats-db Jun 12, 2024
10a1ffc
Rename to transform for now
lennartkats-db Jun 12, 2024
405e202
Merge remote-tracking branch 'databricks/main' into cp-mutator-settings
lennartkats-db Jun 12, 2024
7323d02
Cleanup
lennartkats-db Jun 14, 2024
4dc5f41
Add stricter validations
lennartkats-db Jun 14, 2024
82e1d49
Cleanup
lennartkats-db Jun 19, 2024
6d75e84
Simply mutations, no need for dyn here
lennartkats-db Jun 19, 2024
40b004e
Cleanup
lennartkats-db Jul 5, 2024
29a23cf
Merge remote-tracking branch 'databricks/main' into cp-mutator-settings
lennartkats-db Jul 6, 2024
b353a2f
Rename to presets
lennartkats-db Jul 6, 2024
f636e09
Allow tags to merge instead of override
lennartkats-db Jul 6, 2024
347e24e
Fix test
lennartkats-db Jul 6, 2024
3e003c0
Pause continuous pipelines when 'mode: development' is used
lennartkats-db Jul 11, 2024
40f3bb4
Use extension configuration
lennartkats-db Jul 19, 2024
b1427b3
Address reviewer comments, fix names
lennartkats-db Jul 19, 2024
f2553ff
Merge remote-tracking branch 'databricks/main' into cp-mutator-settings
lennartkats-db Jul 19, 2024
fb902c9
Fix regression in main
lennartkats-db Jul 19, 2024
6159c3c
Merge remote-tracking branch 'databricks/main' into cp-mutator-settings
lennartkats-db Aug 14, 2024
b4564f2
Use bundle.Apply() for tests
lennartkats-db Aug 14, 2024
70d8988
Add assertion
lennartkats-db Aug 16, 2024
a073f84
Merge remote-tracking branch 'databricks/main' into cp-mutator-settings
lennartkats-db Aug 16, 2024
b9e3278
Merge remote-tracking branch 'databricks/main' into cp-mutator-settings
lennartkats-db Aug 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 209 additions & 0 deletions bundle/config/mutator/apply_presets.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
package mutator

import (
"context"
"path"
"slices"
"sort"
"strings"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/textutil"
"github.com/databricks/databricks-sdk-go/service/catalog"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/databricks/databricks-sdk-go/service/ml"
)

type applyPresets struct{}

// Apply all presets, e.g. the prefix presets that
// adds a prefix to all names of all resources.
func ApplyPresets() *applyPresets {
return &applyPresets{}
}

type Tag struct {
Key string
Value string
}

func (m *applyPresets) Name() string {
return "ApplyPresets"
}

func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
if d := validatePauseStatus(b); d != nil {
return d
}

r := b.Config.Resources
t := b.Config.Presets
prefix := t.NamePrefix
tags := toTagArray(t.Tags)

// Jobs presets: Prefix, Tags, JobsMaxConcurrentRuns, TriggerPauseStatus
for _, j := range r.Jobs {
j.Name = prefix + j.Name
if j.Tags == nil {
j.Tags = make(map[string]string)
}
for _, tag := range tags {
if j.Tags[tag.Key] == "" {
j.Tags[tag.Key] = tag.Value
}
}
if j.MaxConcurrentRuns == 0 {
j.MaxConcurrentRuns = t.JobsMaxConcurrentRuns
}
if t.TriggerPauseStatus != "" {
paused := jobs.PauseStatusPaused
if t.TriggerPauseStatus == config.Unpaused {
paused = jobs.PauseStatusUnpaused
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could be "foobar" and be treated as paused. To improve readability you can use a switch/case on t.TriggerPauseStatus here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a validation for the TriggerPauseStatus property above though, so we would show a fatal error if a user uses "foobar". And this way the code here can be a bit more concise.


if j.Schedule != nil && j.Schedule.PauseStatus == "" {
j.Schedule.PauseStatus = paused
}
if j.Continuous != nil && j.Continuous.PauseStatus == "" {
j.Continuous.PauseStatus = paused
}
if j.Trigger != nil && j.Trigger.PauseStatus == "" {
j.Trigger.PauseStatus = paused
}
}
}

// Pipelines presets: Prefix, PipelinesDevelopment
for i := range r.Pipelines {
r.Pipelines[i].Name = prefix + r.Pipelines[i].Name
if config.IsExplicitlyEnabled(t.PipelinesDevelopment) {
r.Pipelines[i].Development = true
}
if t.TriggerPauseStatus == config.Paused {
r.Pipelines[i].Continuous = false
}

// As of 2024-06, pipelines don't yet support tags
}

// Models presets: Prefix, Tags
for _, m := range r.Models {
m.Name = prefix + m.Name
for _, t := range tags {
exists := slices.ContainsFunc(m.Tags, func(modelTag ml.ModelTag) bool {
return modelTag.Key == t.Key
})
if !exists {
// Only add this tag if the resource didn't include any tag that overrides its value.
m.Tags = append(m.Tags, ml.ModelTag{Key: t.Key, Value: t.Value})
}
}
}

// Experiments presets: Prefix, Tags
for _, e := range r.Experiments {
filepath := e.Name
dir := path.Dir(filepath)
base := path.Base(filepath)
if dir == "." {
e.Name = prefix + base
} else {
e.Name = dir + "/" + prefix + base
}
for _, t := range tags {
exists := false
for _, experimentTag := range e.Tags {
if experimentTag.Key == t.Key {
exists = true
break
}
}
if !exists {
e.Tags = append(e.Tags, ml.ExperimentTag{Key: t.Key, Value: t.Value})
}
}
}

// Model serving endpoint presets: Prefix
for i := range r.ModelServingEndpoints {
r.ModelServingEndpoints[i].Name = normalizePrefix(prefix) + r.ModelServingEndpoints[i].Name

// As of 2024-06, model serving endpoints don't yet support tags
}

// Registered models presets: Prefix
for i := range r.RegisteredModels {
r.RegisteredModels[i].Name = normalizePrefix(prefix) + r.RegisteredModels[i].Name

// As of 2024-06, registered models don't yet support tags
}

// Quality monitors presets: Prefix
if t.TriggerPauseStatus == config.Paused {
for i := range r.QualityMonitors {
// Remove all schedules from monitors, since they don't support pausing/unpausing.
// Quality monitors might support the "pause" property in the future, so at the
// CLI level we do respect that property if it is set to "unpaused."
if r.QualityMonitors[i].Schedule != nil && r.QualityMonitors[i].Schedule.PauseStatus != catalog.MonitorCronSchedulePauseStatusUnpaused {
r.QualityMonitors[i].Schedule = nil
}
}
}

// Schemas: Prefix
for i := range r.Schemas {
prefix = "dev_" + b.Config.Workspace.CurrentUser.ShortName + "_"
r.Schemas[i].Name = prefix + r.Schemas[i].Name
// HTTP API for schemas doesn't yet support tags. It's only supported in
// the Databricks UI and via the SQL API.
}

return nil
}

func validatePauseStatus(b *bundle.Bundle) diag.Diagnostics {
p := b.Config.Presets.TriggerPauseStatus
if p == "" || p == config.Paused || p == config.Unpaused {
return nil
}
return diag.Diagnostics{{
Summary: "Invalid value for trigger_pause_status, should be PAUSED or UNPAUSED",
Severity: diag.Error,
Locations: []dyn.Location{b.Config.GetLocation("presets.trigger_pause_status")},
}}
}

// toTagArray converts a map of tags to an array of tags.
// We sort tags so ensure stable ordering.
func toTagArray(tags map[string]string) []Tag {
var tagArray []Tag
if tags == nil {
return tagArray
}
for key, value := range tags {
tagArray = append(tagArray, Tag{Key: key, Value: value})
}
sort.Slice(tagArray, func(i, j int) bool {
return tagArray[i].Key < tagArray[j].Key
})
return tagArray
}

// normalizePrefix prefixes strings like '[dev lennart] ' to 'dev_lennart_'.
// We leave unicode letters and numbers but remove all "special characters."
func normalizePrefix(prefix string) string {
prefix = strings.ReplaceAll(prefix, "[", "")
prefix = strings.Trim(prefix, " ")

// If the prefix ends with a ']', we add an underscore to the end.
// This makes sure that we get names like "dev_user_endpoint" instead of "dev_userendpoint"
suffix := ""
if strings.HasSuffix(prefix, "]") {
suffix = "_"
}

return textutil.NormalizeString(prefix) + suffix
}
Loading