From 4e8e02738081b74017bcb9d7b440e75ffa08c0d7 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Thu, 26 Sep 2024 18:52:22 +0530 Subject: [PATCH] Sort tasks by `task_key` before generating the Terraform configuration (#1776) ## Changes Sort the tasks in the resultant `bundle.tf.json`. This is important because configuration from one task can leak into another if the tasks are not sorted. For more details see: 1. https://github.com/databricks/terraform-provider-databricks/issues/3951 2. https://github.com/databricks/terraform-provider-databricks/issues/4011 ## Tests Unit test and manually. For manual testing I used the following configuration: ``` resources: jobs: foo: tasks: - task_key: task-Z notebook_task: notebook_path: nb.py source: GIT existing_cluster_id: 0715-133738-ju0ma84z - task_key: task-1 notebook_task: notebook_path: ${workspace.file_path}/local.py source: WORKSPACE existing_cluster_id: 0715-133738-ju0ma84z depends_on: - task_key: task-Z git_source: git_provider: gitHub git_url: https://github.com/shreyas-goenka/job-source-tmp.git git_branch: main ``` Steps (1): 1. Deploy this bundle. 2. Comment out "source: GIT" 3. Deploy again Before: Deploying this bundle twice would fail. This is because the "source: GIT" would carry over to the next deployment. After: There was no error on the subsequent deployment. Steps (2): 1. Deploy once 2. Deploy again Before: Works correctly but leads to a update API call every time. After: No diff is detected by terraform. --- bundle/deploy/terraform/convert.go | 5 +++ bundle/deploy/terraform/tfdyn/convert_job.go | 33 ++++++++++++++++++- .../terraform/tfdyn/convert_job_test.go | 30 ++++++++++++++--- 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/bundle/deploy/terraform/convert.go b/bundle/deploy/terraform/convert.go index 5a548e3b50..b8993c0317 100644 --- a/bundle/deploy/terraform/convert.go +++ b/bundle/deploy/terraform/convert.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "sort" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" @@ -82,6 +83,10 @@ func BundleToTerraform(config *config.Root) *schema.Root { conv(src, &dst) if src.JobSettings != nil { + sort.Slice(src.JobSettings.Tasks, func(i, j int) bool { + return src.JobSettings.Tasks[i].TaskKey < src.JobSettings.Tasks[j].TaskKey + }) + for _, v := range src.Tasks { var t schema.ResourceJobTask conv(v, &t) diff --git a/bundle/deploy/terraform/tfdyn/convert_job.go b/bundle/deploy/terraform/tfdyn/convert_job.go index d1e7e73e2c..8948e3bafd 100644 --- a/bundle/deploy/terraform/tfdyn/convert_job.go +++ b/bundle/deploy/terraform/tfdyn/convert_job.go @@ -3,6 +3,7 @@ package tfdyn import ( "context" "fmt" + "sort" "github.com/databricks/cli/bundle/internal/tf/schema" "github.com/databricks/cli/libs/dyn" @@ -19,8 +20,38 @@ func convertJobResource(ctx context.Context, vin dyn.Value) (dyn.Value, error) { log.Debugf(ctx, "job normalization diagnostic: %s", diag.Summary) } + // Sort the tasks of each job in the bundle by task key. Sorting + // the task keys ensures that the diff computed by terraform is correct and avoids + // recreates. For more details see the NOTE at + // https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/job#example-usage + // and https://github.com/databricks/terraform-provider-databricks/issues/4011 + // and https://github.com/databricks/cli/pull/1776 + vout := vin + var err error + tasks, ok := vin.Get("tasks").AsSequence() + if ok { + sort.Slice(tasks, func(i, j int) bool { + // We sort the tasks by their task key. Tasks without task keys are ordered + // before tasks with task keys. We do not error for those tasks + // since presence of a task_key is validated for in the Jobs backend. + tk1, ok := tasks[i].Get("task_key").AsString() + if !ok { + return true + } + tk2, ok := tasks[j].Get("task_key").AsString() + if !ok { + return false + } + return tk1 < tk2 + }) + vout, err = dyn.Set(vin, "tasks", dyn.V(tasks)) + if err != nil { + return dyn.InvalidValue, err + } + } + // Modify top-level keys. - vout, err := renameKeys(vin, map[string]string{ + vout, err = renameKeys(vout, map[string]string{ "tasks": "task", "job_clusters": "job_cluster", "parameters": "parameter", diff --git a/bundle/deploy/terraform/tfdyn/convert_job_test.go b/bundle/deploy/terraform/tfdyn/convert_job_test.go index b9e1f967f0..695b9ba249 100644 --- a/bundle/deploy/terraform/tfdyn/convert_job_test.go +++ b/bundle/deploy/terraform/tfdyn/convert_job_test.go @@ -42,8 +42,8 @@ func TestConvertJob(t *testing.T) { }, Tasks: []jobs.Task{ { - TaskKey: "task_key", - JobClusterKey: "job_cluster_key", + TaskKey: "task_key_b", + JobClusterKey: "job_cluster_key_b", Libraries: []compute.Library{ { Pypi: &compute.PythonPyPiLibrary{ @@ -55,6 +55,17 @@ func TestConvertJob(t *testing.T) { }, }, }, + { + TaskKey: "task_key_a", + JobClusterKey: "job_cluster_key_a", + }, + { + TaskKey: "task_key_c", + JobClusterKey: "job_cluster_key_c", + }, + { + Description: "missing task key 😱", + }, }, }, Permissions: []resources.Permission{ @@ -100,8 +111,15 @@ func TestConvertJob(t *testing.T) { }, "task": []any{ map[string]any{ - "task_key": "task_key", - "job_cluster_key": "job_cluster_key", + "description": "missing task key 😱", + }, + map[string]any{ + "task_key": "task_key_a", + "job_cluster_key": "job_cluster_key_a", + }, + map[string]any{ + "task_key": "task_key_b", + "job_cluster_key": "job_cluster_key_b", "library": []any{ map[string]any{ "pypi": map[string]any{ @@ -113,6 +131,10 @@ func TestConvertJob(t *testing.T) { }, }, }, + map[string]any{ + "task_key": "task_key_c", + "job_cluster_key": "job_cluster_key_c", + }, }, }, out.Job["my_job"])