From 66f2ba64a8a479d45efb9b23eab096a5ffda1367 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 26 Sep 2024 14:55:07 +0200 Subject: [PATCH 1/2] Simplified isFullVariableOverrideDef implementation (#1791) ## Changes Simplified isFullVariableOverrideDef implementation Follow up on https://github.com/databricks/cli/pull/1787 --- bundle/config/root.go | 50 ++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 32 deletions(-) diff --git a/bundle/config/root.go b/bundle/config/root.go index ff169e4cef..4b1467456d 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -406,7 +406,14 @@ func (r *Root) MergeTargetOverrides(name string) error { return r.updateWithDynamicValue(root) } -var variableKeywords = []string{"default", "lookup"} +var allowedVariableDefinitions = []([]string){ + {"default", "type", "description"}, + {"default", "type"}, + {"default", "description"}, + {"lookup", "description"}, + {"default"}, + {"lookup"}, +} // isFullVariableOverrideDef checks if the given value is a full syntax varaible override. // A full syntax variable override is a map with either 1 of 2 keys. @@ -423,42 +430,21 @@ func isFullVariableOverrideDef(v dyn.Value) bool { return false } - // If the map has 3 keys, they should be "description", "type" and "default" or "lookup" - if mv.Len() == 3 { - if _, ok := mv.GetByString("type"); ok { - if _, ok := mv.GetByString("description"); ok { - if _, ok := mv.GetByString("default"); ok { - return true - } - } - } - - return false - } - - // If the map has 2 keys, one of them should be "default" or "lookup" and the other is "type" or "description" - if mv.Len() == 2 { - if _, ok := mv.GetByString("type"); ok { - if _, ok := mv.GetByString("default"); ok { - return true - } + for _, keys := range allowedVariableDefinitions { + if len(keys) != mv.Len() { + continue } - if _, ok := mv.GetByString("description"); ok { - if _, ok := mv.GetByString("default"); ok { - return true - } - - if _, ok := mv.GetByString("lookup"); ok { - return true + // Check if the keys are the same. + match := true + for _, key := range keys { + if _, ok := mv.GetByString(key); !ok { + match = false + break } } - return false - } - - for _, keyword := range variableKeywords { - if _, ok := mv.GetByString(keyword); ok { + if match { return true } } From 4e8e02738081b74017bcb9d7b440e75ffa08c0d7 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Thu, 26 Sep 2024 18:52:22 +0530 Subject: [PATCH 2/2] Sort tasks by `task_key` before generating the Terraform configuration (#1776) ## Changes Sort the tasks in the resultant `bundle.tf.json`. This is important because configuration from one task can leak into another if the tasks are not sorted. For more details see: 1. https://github.com/databricks/terraform-provider-databricks/issues/3951 2. https://github.com/databricks/terraform-provider-databricks/issues/4011 ## Tests Unit test and manually. For manual testing I used the following configuration: ``` resources: jobs: foo: tasks: - task_key: task-Z notebook_task: notebook_path: nb.py source: GIT existing_cluster_id: 0715-133738-ju0ma84z - task_key: task-1 notebook_task: notebook_path: ${workspace.file_path}/local.py source: WORKSPACE existing_cluster_id: 0715-133738-ju0ma84z depends_on: - task_key: task-Z git_source: git_provider: gitHub git_url: https://github.com/shreyas-goenka/job-source-tmp.git git_branch: main ``` Steps (1): 1. Deploy this bundle. 2. Comment out "source: GIT" 3. Deploy again Before: Deploying this bundle twice would fail. This is because the "source: GIT" would carry over to the next deployment. After: There was no error on the subsequent deployment. Steps (2): 1. Deploy once 2. Deploy again Before: Works correctly but leads to a update API call every time. After: No diff is detected by terraform. --- bundle/deploy/terraform/convert.go | 5 +++ bundle/deploy/terraform/tfdyn/convert_job.go | 33 ++++++++++++++++++- .../terraform/tfdyn/convert_job_test.go | 30 ++++++++++++++--- 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/bundle/deploy/terraform/convert.go b/bundle/deploy/terraform/convert.go index 5a548e3b50..b8993c0317 100644 --- a/bundle/deploy/terraform/convert.go +++ b/bundle/deploy/terraform/convert.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "sort" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" @@ -82,6 +83,10 @@ func BundleToTerraform(config *config.Root) *schema.Root { conv(src, &dst) if src.JobSettings != nil { + sort.Slice(src.JobSettings.Tasks, func(i, j int) bool { + return src.JobSettings.Tasks[i].TaskKey < src.JobSettings.Tasks[j].TaskKey + }) + for _, v := range src.Tasks { var t schema.ResourceJobTask conv(v, &t) diff --git a/bundle/deploy/terraform/tfdyn/convert_job.go b/bundle/deploy/terraform/tfdyn/convert_job.go index d1e7e73e2c..8948e3bafd 100644 --- a/bundle/deploy/terraform/tfdyn/convert_job.go +++ b/bundle/deploy/terraform/tfdyn/convert_job.go @@ -3,6 +3,7 @@ package tfdyn import ( "context" "fmt" + "sort" "github.com/databricks/cli/bundle/internal/tf/schema" "github.com/databricks/cli/libs/dyn" @@ -19,8 +20,38 @@ func convertJobResource(ctx context.Context, vin dyn.Value) (dyn.Value, error) { log.Debugf(ctx, "job normalization diagnostic: %s", diag.Summary) } + // Sort the tasks of each job in the bundle by task key. Sorting + // the task keys ensures that the diff computed by terraform is correct and avoids + // recreates. For more details see the NOTE at + // https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/job#example-usage + // and https://github.com/databricks/terraform-provider-databricks/issues/4011 + // and https://github.com/databricks/cli/pull/1776 + vout := vin + var err error + tasks, ok := vin.Get("tasks").AsSequence() + if ok { + sort.Slice(tasks, func(i, j int) bool { + // We sort the tasks by their task key. Tasks without task keys are ordered + // before tasks with task keys. We do not error for those tasks + // since presence of a task_key is validated for in the Jobs backend. + tk1, ok := tasks[i].Get("task_key").AsString() + if !ok { + return true + } + tk2, ok := tasks[j].Get("task_key").AsString() + if !ok { + return false + } + return tk1 < tk2 + }) + vout, err = dyn.Set(vin, "tasks", dyn.V(tasks)) + if err != nil { + return dyn.InvalidValue, err + } + } + // Modify top-level keys. - vout, err := renameKeys(vin, map[string]string{ + vout, err = renameKeys(vout, map[string]string{ "tasks": "task", "job_clusters": "job_cluster", "parameters": "parameter", diff --git a/bundle/deploy/terraform/tfdyn/convert_job_test.go b/bundle/deploy/terraform/tfdyn/convert_job_test.go index b9e1f967f0..695b9ba249 100644 --- a/bundle/deploy/terraform/tfdyn/convert_job_test.go +++ b/bundle/deploy/terraform/tfdyn/convert_job_test.go @@ -42,8 +42,8 @@ func TestConvertJob(t *testing.T) { }, Tasks: []jobs.Task{ { - TaskKey: "task_key", - JobClusterKey: "job_cluster_key", + TaskKey: "task_key_b", + JobClusterKey: "job_cluster_key_b", Libraries: []compute.Library{ { Pypi: &compute.PythonPyPiLibrary{ @@ -55,6 +55,17 @@ func TestConvertJob(t *testing.T) { }, }, }, + { + TaskKey: "task_key_a", + JobClusterKey: "job_cluster_key_a", + }, + { + TaskKey: "task_key_c", + JobClusterKey: "job_cluster_key_c", + }, + { + Description: "missing task key 😱", + }, }, }, Permissions: []resources.Permission{ @@ -100,8 +111,15 @@ func TestConvertJob(t *testing.T) { }, "task": []any{ map[string]any{ - "task_key": "task_key", - "job_cluster_key": "job_cluster_key", + "description": "missing task key 😱", + }, + map[string]any{ + "task_key": "task_key_a", + "job_cluster_key": "job_cluster_key_a", + }, + map[string]any{ + "task_key": "task_key_b", + "job_cluster_key": "job_cluster_key_b", "library": []any{ map[string]any{ "pypi": map[string]any{ @@ -113,6 +131,10 @@ func TestConvertJob(t *testing.T) { }, }, }, + map[string]any{ + "task_key": "task_key_c", + "job_cluster_key": "job_cluster_key_c", + }, }, }, out.Job["my_job"])