From edc08149d3d6a4943072f23492fce494082edf90 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 27 Aug 2024 14:51:10 +0200 Subject: [PATCH 01/22] Disable prompt for storage-credentials get command (#1723) ## Changes Fixes #1079 --- .codegen/service.go.tmpl | 1 + .../storage-credentials.go | 22 +++++-------------- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/.codegen/service.go.tmpl b/.codegen/service.go.tmpl index 111745e4f2..281dfd6eb8 100644 --- a/.codegen/service.go.tmpl +++ b/.codegen/service.go.tmpl @@ -154,6 +154,7 @@ func new{{.PascalName}}() *cobra.Command { "provider-exchanges delete-listing-from-exchange" "provider-exchanges list-exchanges-for-listing" "provider-exchanges list-listings-for-exchange" + "storage-credentials get" -}} {{- $fullCommandName := (print $serviceName " " .KebabName) -}} {{- $noPrompt := or .IsCrudCreate (in $excludeFromPrompts $fullCommandName) }} diff --git a/cmd/workspace/storage-credentials/storage-credentials.go b/cmd/workspace/storage-credentials/storage-credentials.go index 18656a61c1..f4ec5eb4f4 100755 --- a/cmd/workspace/storage-credentials/storage-credentials.go +++ b/cmd/workspace/storage-credentials/storage-credentials.go @@ -241,28 +241,16 @@ func newGet() *cobra.Command { cmd.Annotations = make(map[string]string) + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(1) + return check(cmd, args) + } + cmd.PreRunE = root.MustWorkspaceClient cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { ctx := cmd.Context() w := root.WorkspaceClient(ctx) - if len(args) == 0 { - promptSpinner := cmdio.Spinner(ctx) - promptSpinner <- "No NAME argument specified. Loading names for Storage Credentials drop-down." - names, err := w.StorageCredentials.StorageCredentialInfoNameToIdMap(ctx, catalog.ListStorageCredentialsRequest{}) - close(promptSpinner) - if err != nil { - return fmt.Errorf("failed to load names for Storage Credentials drop-down. Please manually specify required arguments. Original error: %w", err) - } - id, err := cmdio.Select(ctx, names, "Name of the storage credential") - if err != nil { - return err - } - args = append(args, id) - } - if len(args) != 1 { - return fmt.Errorf("expected to have name of the storage credential") - } getReq.Name = args[0] response, err := w.StorageCredentials.Get(ctx, getReq) From 70363836d5a462851d8d2196eb288d7bdb1ad01d Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 28 Aug 2024 13:39:06 +0200 Subject: [PATCH 02/22] Correctly mark PyPI package name specs with multiple specifiers as remote libraries (#1725) Correctly mark pypi package name specs with multiple specifiers as remote libraries Fixes this https://github.com/databricks/cli/issues/1728 --- bundle/libraries/local_path.go | 6 ++++-- bundle/libraries/local_path_test.go | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index e49562405a..6d60d56bc6 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -72,9 +72,11 @@ func IsLibraryLocal(dep string) bool { // ^[a-zA-Z0-9\-_]+: Matches the package name, allowing alphanumeric characters, dashes (-), and underscores (_). // \[.*\])?: Optionally matches any extras specified in square brackets, e.g., [security]. -// ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?)?: Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). +// ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?): Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). +// ,?: Optionally matches a comma (,) at the end of the specifier which is used to separate multiple specifiers. +// There can be multiple version specifiers separated by commas or no specifiers. // Spec for package name and version specifier: https://pip.pypa.io/en/stable/reference/requirement-specifiers/ -var packageRegex = regexp.MustCompile(`^[a-zA-Z0-9\-_]+\s?(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+){0,2}(\.\*)?)?$`) +var packageRegex = regexp.MustCompile(`^[a-zA-Z0-9\-_]+\s?(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+){0,2}(\.\*)?,?)*$`) func isPackage(name string) bool { if packageRegex.MatchString(name) { diff --git a/bundle/libraries/local_path_test.go b/bundle/libraries/local_path_test.go index 667d64ec87..dc157cad8d 100644 --- a/bundle/libraries/local_path_test.go +++ b/bundle/libraries/local_path_test.go @@ -62,6 +62,8 @@ func TestIsLibraryLocal(t *testing.T) { {path: "beautifulsoup4 ~= 4.12.3", expected: false}, {path: "beautifulsoup4[security, tests]", expected: false}, {path: "beautifulsoup4[security, tests] ~= 4.12.3", expected: false}, + {path: "beautifulsoup4>=1.0.0,<2.0.0", expected: false}, + {path: "beautifulsoup4>=1.0.0,~=1.2.0,<2.0.0", expected: false}, {path: "https://github.com/pypa/pip/archive/22.0.2.zip", expected: false}, {path: "pip @ https://github.com/pypa/pip/archive/22.0.2.zip", expected: false}, {path: "requests [security] @ https://github.com/psf/requests/archive/refs/heads/main.zip", expected: false}, From 85459c1963e6e86f3ece1029caae51156f6b87f6 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Wed, 28 Aug 2024 14:14:19 +0200 Subject: [PATCH 03/22] Improve error handling for /Volumes paths in mode: development (#1716) ## Changes * Provide a more helpful error when using an artifact_path based on /Volumes * Allow the use of short_names in /Volumes paths ## Example cases Example of a valid /Volumes artifact_path: * `artifact_path: /Volumes/catalog/schema/${workspace.current_user.short_name}/libs` Example of an invalid /Volumes path (when using `mode: development`): * `artifact_path: /Volumes/catalog/schema/libs` * Resulting error: `artifact_path should contain the current username or ${workspace.current_user.short_name} to ensure uniqueness when using 'mode: development'` --- bundle/config/mutator/process_target_mode.go | 35 +++++++++++++------ .../mutator/process_target_mode_test.go | 12 ++++++- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/bundle/config/mutator/process_target_mode.go b/bundle/config/mutator/process_target_mode.go index 92ed286899..70382f054b 100644 --- a/bundle/config/mutator/process_target_mode.go +++ b/bundle/config/mutator/process_target_mode.go @@ -64,6 +64,7 @@ func transformDevelopmentMode(ctx context.Context, b *bundle.Bundle) { } func validateDevelopmentMode(b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics p := b.Config.Presets u := b.Config.Workspace.CurrentUser @@ -74,44 +75,56 @@ func validateDevelopmentMode(b *bundle.Bundle) diag.Diagnostics { // status to UNPAUSED at the level of an individual object, whic hwas // historically allowed.) if p.TriggerPauseStatus == config.Unpaused { - return diag.Diagnostics{{ + diags = diags.Append(diag.Diagnostic{ Severity: diag.Error, Summary: "target with 'mode: development' cannot set trigger pause status to UNPAUSED by default", Locations: []dyn.Location{b.Config.GetLocation("presets.trigger_pause_status")}, - }} + }) } // Make sure this development copy has unique names and paths to avoid conflicts if path := findNonUserPath(b); path != "" { - return diag.Errorf("%s must start with '~/' or contain the current username when using 'mode: development'", path) + if path == "artifact_path" && strings.HasPrefix(b.Config.Workspace.ArtifactPath, "/Volumes") { + // For Volumes paths we recommend including the current username as a substring + diags = diags.Extend(diag.Errorf("%s should contain the current username or ${workspace.current_user.short_name} to ensure uniqueness when using 'mode: development'", path)) + } else { + // For non-Volumes paths recommend simply putting things in the home folder + diags = diags.Extend(diag.Errorf("%s must start with '~/' or contain the current username to ensure uniqueness when using 'mode: development'", path)) + } } if p.NamePrefix != "" && !strings.Contains(p.NamePrefix, u.ShortName) && !strings.Contains(p.NamePrefix, u.UserName) { // Resources such as pipelines require a unique name, e.g. '[dev steve] my_pipeline'. // For this reason we require the name prefix to contain the current username; // it's a pitfall for users if they don't include it and later find out that // only a single user can do development deployments. - return diag.Diagnostics{{ + diags = diags.Append(diag.Diagnostic{ Severity: diag.Error, Summary: "prefix should contain the current username or ${workspace.current_user.short_name} to ensure uniqueness when using 'mode: development'", Locations: []dyn.Location{b.Config.GetLocation("presets.name_prefix")}, - }} + }) } - return nil + return diags } +// findNonUserPath finds the first workspace path such as root_path that doesn't +// contain the current username or current user's shortname. func findNonUserPath(b *bundle.Bundle) string { - username := b.Config.Workspace.CurrentUser.UserName + containsName := func(path string) bool { + username := b.Config.Workspace.CurrentUser.UserName + shortname := b.Config.Workspace.CurrentUser.ShortName + return strings.Contains(path, username) || strings.Contains(path, shortname) + } - if b.Config.Workspace.RootPath != "" && !strings.Contains(b.Config.Workspace.RootPath, username) { + if b.Config.Workspace.RootPath != "" && !containsName(b.Config.Workspace.RootPath) { return "root_path" } - if b.Config.Workspace.StatePath != "" && !strings.Contains(b.Config.Workspace.StatePath, username) { + if b.Config.Workspace.StatePath != "" && !containsName(b.Config.Workspace.StatePath) { return "state_path" } - if b.Config.Workspace.FilePath != "" && !strings.Contains(b.Config.Workspace.FilePath, username) { + if b.Config.Workspace.FilePath != "" && !containsName(b.Config.Workspace.FilePath) { return "file_path" } - if b.Config.Workspace.ArtifactPath != "" && !strings.Contains(b.Config.Workspace.ArtifactPath, username) { + if b.Config.Workspace.ArtifactPath != "" && !containsName(b.Config.Workspace.ArtifactPath) { return "artifact_path" } return "" diff --git a/bundle/config/mutator/process_target_mode_test.go b/bundle/config/mutator/process_target_mode_test.go index 1c8671b4c5..42f1929c89 100644 --- a/bundle/config/mutator/process_target_mode_test.go +++ b/bundle/config/mutator/process_target_mode_test.go @@ -230,10 +230,20 @@ func TestValidateDevelopmentMode(t *testing.T) { diags := validateDevelopmentMode(b) require.NoError(t, diags.Error()) + // Test with /Volumes path + b = mockBundle(config.Development) + b.Config.Workspace.ArtifactPath = "/Volumes/catalog/schema/lennart/libs" + diags = validateDevelopmentMode(b) + require.NoError(t, diags.Error()) + b.Config.Workspace.ArtifactPath = "/Volumes/catalog/schema/libs" + diags = validateDevelopmentMode(b) + require.ErrorContains(t, diags.Error(), "artifact_path should contain the current username or ${workspace.current_user.short_name} to ensure uniqueness when using 'mode: development'") + // Test with a bundle that has a non-user path + b = mockBundle(config.Development) b.Config.Workspace.RootPath = "/Shared/.bundle/x/y/state" diags = validateDevelopmentMode(b) - require.ErrorContains(t, diags.Error(), "root_path") + require.ErrorContains(t, diags.Error(), "root_path must start with '~/' or contain the current username to ensure uniqueness when using 'mode: development'") // Test with a bundle that has an unpaused trigger pause status b = mockBundle(config.Development) From 7dcc791b05905a0ac78345651320969212f0123f Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 28 Aug 2024 15:09:07 +0200 Subject: [PATCH 04/22] [Release] Release v0.227.1 (#1729) CLI: * Disable prompt for storage-credentials get command ([#1723](https://github.com/databricks/cli/pull/1723)). Bundles: * Do not treat empty path as a local path ([#1717](https://github.com/databricks/cli/pull/1717)). * Correctly mark PyPI package name specs with multiple specifiers as remote libraries ([#1725](https://github.com/databricks/cli/pull/1725)). * Improve error handling for /Volumes paths in mode: development ([#1716](https://github.com/databricks/cli/pull/1716)). Internal: * Ignore CLI version check on development builds of the CLI ([#1714](https://github.com/databricks/cli/pull/1714)). API Changes: * Added `databricks resource-quotas` command group. * Added `databricks policy-compliance-for-clusters` command group. * Added `databricks policy-compliance-for-jobs` command group. OpenAPI commit 3eae49b444cac5a0118a3503e5b7ecef7f96527a (2024-08-21) Dependency updates: * Bump github.com/databricks/databricks-sdk-go from 0.44.0 to 0.45.0 ([#1719](https://github.com/databricks/cli/pull/1719)). * Revert hc-install version to 0.7.0 ([#1711](https://github.com/databricks/cli/pull/1711)). --- CHANGELOG.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88a62d0987..fac7d597d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # Version changelog +## [Release] Release v0.227.1 + +CLI: + * Disable prompt for storage-credentials get command ([#1723](https://github.com/databricks/cli/pull/1723)). + +Bundles: + * Do not treat empty path as a local path ([#1717](https://github.com/databricks/cli/pull/1717)). + * Correctly mark PyPI package name specs with multiple specifiers as remote libraries ([#1725](https://github.com/databricks/cli/pull/1725)). + * Improve error handling for /Volumes paths in mode: development ([#1716](https://github.com/databricks/cli/pull/1716)). + +Internal: + * Ignore CLI version check on development builds of the CLI ([#1714](https://github.com/databricks/cli/pull/1714)). + +API Changes: + * Added `databricks resource-quotas` command group. + * Added `databricks policy-compliance-for-clusters` command group. + * Added `databricks policy-compliance-for-jobs` command group. + +OpenAPI commit 3eae49b444cac5a0118a3503e5b7ecef7f96527a (2024-08-21) +Dependency updates: + * Bump github.com/databricks/databricks-sdk-go from 0.44.0 to 0.45.0 ([#1719](https://github.com/databricks/cli/pull/1719)). + * Revert hc-install version to 0.7.0 ([#1711](https://github.com/databricks/cli/pull/1711)). + ## [Release] Release v0.227.0 CLI: From 43ace69bb955a445885d4477a55de88c237a05ba Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 29 Aug 2024 14:47:44 +0200 Subject: [PATCH 05/22] Consider serverless clusters as compatible for Python wheel tasks (#1733) ## Changes Consider serverless clusters as compatible for Python wheel tasks. Fixes a `Python wheel tasks require compute with DBR 13.3+ to include local libraries` warning shown for serverless clusters --- bundle/python/warning.go | 20 ++++++++++++++++---- bundle/python/warning_test.go | 4 +++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/bundle/python/warning.go b/bundle/python/warning.go index d53796d734..0e9d8bef0b 100644 --- a/bundle/python/warning.go +++ b/bundle/python/warning.go @@ -2,6 +2,7 @@ package python import ( "context" + "strconv" "strings" "github.com/databricks/cli/bundle" @@ -38,7 +39,7 @@ func hasIncompatibleWheelTasks(ctx context.Context, b *bundle.Bundle) bool { tasks := libraries.FindTasksWithLocalLibraries(b) for _, task := range tasks { if task.NewCluster != nil { - if lowerThanExpectedVersion(ctx, task.NewCluster.SparkVersion) { + if lowerThanExpectedVersion(task.NewCluster.SparkVersion) { return true } } @@ -47,7 +48,7 @@ func hasIncompatibleWheelTasks(ctx context.Context, b *bundle.Bundle) bool { for _, job := range b.Config.Resources.Jobs { for _, cluster := range job.JobClusters { if task.JobClusterKey == cluster.JobClusterKey && cluster.NewCluster.SparkVersion != "" { - if lowerThanExpectedVersion(ctx, cluster.NewCluster.SparkVersion) { + if lowerThanExpectedVersion(cluster.NewCluster.SparkVersion) { return true } } @@ -64,7 +65,7 @@ func hasIncompatibleWheelTasks(ctx context.Context, b *bundle.Bundle) bool { return false } - if lowerThanExpectedVersion(ctx, version) { + if lowerThanExpectedVersion(version) { return true } } @@ -73,7 +74,7 @@ func hasIncompatibleWheelTasks(ctx context.Context, b *bundle.Bundle) bool { return false } -func lowerThanExpectedVersion(ctx context.Context, sparkVersion string) bool { +func lowerThanExpectedVersion(sparkVersion string) bool { parts := strings.Split(sparkVersion, ".") if len(parts) < 2 { return false @@ -82,6 +83,17 @@ func lowerThanExpectedVersion(ctx context.Context, sparkVersion string) bool { if parts[1][0] == 'x' { // treat versions like 13.x as the very latest minor (13.99) parts[1] = "99" } + + // if any of the version parts are not numbers, we can't compare + // so consider it as compatible version + if _, err := strconv.Atoi(parts[0]); err != nil { + return false + } + + if _, err := strconv.Atoi(parts[1]); err != nil { + return false + } + v := "v" + parts[0] + "." + parts[1] return semver.Compare(v, "v13.1") < 0 } diff --git a/bundle/python/warning_test.go b/bundle/python/warning_test.go index b2296392ba..a5ab75632f 100644 --- a/bundle/python/warning_test.go +++ b/bundle/python/warning_test.go @@ -344,6 +344,8 @@ func TestSparkVersionLowerThanExpected(t *testing.T) { "14.1.x-scala2.12": false, "13.x-snapshot-scala-2.12": false, "13.x-rc-scala-2.12": false, + "client.1.10-scala2.12": false, + "latest-stable-gpu-scala2.11": false, "10.4.x-aarch64-photon-scala2.12": true, "10.4.x-scala2.12": true, "13.0.x-scala2.12": true, @@ -351,7 +353,7 @@ func TestSparkVersionLowerThanExpected(t *testing.T) { } for k, v := range testCases { - result := lowerThanExpectedVersion(context.Background(), k) + result := lowerThanExpectedVersion(k) require.Equal(t, v, result, k) } } From 0f4891f0fef96fab6bffce7572caa2b97615911a Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 29 Aug 2024 15:02:34 +0200 Subject: [PATCH 06/22] Add `dyn.Time` to box a timestamp with its original string value (#1732) ## Changes If not explicitly quoted, the YAML loader interprets a value like `2024-08-29` as a timestamp. Such a value is usually intended to be a string instead. Our normalization logic was not able to turn a time value back into the original string. This change boxes the time value to include its original string representation. Normalization of one of these values into a string can now use the original input value. ## Tests Unit tests in `libs/dyn/convert`. --- libs/dyn/convert/normalize.go | 2 + libs/dyn/convert/normalize_test.go | 8 ++++ libs/dyn/kind.go | 3 +- libs/dyn/merge/override_test.go | 12 +++--- libs/dyn/time.go | 62 ++++++++++++++++++++++++++++++ libs/dyn/time_test.go | 41 ++++++++++++++++++++ libs/dyn/value.go | 3 +- libs/dyn/value_underlying.go | 7 ++-- libs/dyn/value_underlying_test.go | 2 +- libs/dyn/yamlloader/loader.go | 15 ++------ libs/dyn/yamlsaver/saver.go | 2 +- libs/dyn/yamlsaver/saver_test.go | 9 +++-- 12 files changed, 136 insertions(+), 30 deletions(-) create mode 100644 libs/dyn/time.go create mode 100644 libs/dyn/time_test.go diff --git a/libs/dyn/convert/normalize.go b/libs/dyn/convert/normalize.go index c80a914f14..bc80a150a1 100644 --- a/libs/dyn/convert/normalize.go +++ b/libs/dyn/convert/normalize.go @@ -267,6 +267,8 @@ func (n normalizeOptions) normalizeString(typ reflect.Type, src dyn.Value, path out = strconv.FormatInt(src.MustInt(), 10) case dyn.KindFloat: out = strconv.FormatFloat(src.MustFloat(), 'f', -1, 64) + case dyn.KindTime: + out = src.MustTime().String() case dyn.KindNil: // Return a warning if the field is present but has a null value. return dyn.InvalidValue, diags.Append(nullWarning(dyn.KindString, src, path)) diff --git a/libs/dyn/convert/normalize_test.go b/libs/dyn/convert/normalize_test.go index c2256615e9..4b2a3c189d 100644 --- a/libs/dyn/convert/normalize_test.go +++ b/libs/dyn/convert/normalize_test.go @@ -569,6 +569,14 @@ func TestNormalizeStringFromFloat(t *testing.T) { assert.Equal(t, dyn.NewValue("1.2", vin.Locations()), vout) } +func TestNormalizeStringFromTime(t *testing.T) { + var typ string + vin := dyn.NewValue(dyn.MustTime("2024-08-29"), []dyn.Location{{File: "file", Line: 1, Column: 1}}) + vout, err := Normalize(&typ, vin) + assert.Empty(t, err) + assert.Equal(t, dyn.NewValue("2024-08-29", vin.Locations()), vout) +} + func TestNormalizeStringError(t *testing.T) { var typ string vin := dyn.V(map[string]dyn.Value{"an": dyn.V("error")}) diff --git a/libs/dyn/kind.go b/libs/dyn/kind.go index 9d507fbc52..9c0c144231 100644 --- a/libs/dyn/kind.go +++ b/libs/dyn/kind.go @@ -2,7 +2,6 @@ package dyn import ( "fmt" - "time" ) type Kind int @@ -34,7 +33,7 @@ func kindOf(v any) Kind { return KindInt case float32, float64: return KindFloat - case time.Time: + case Time: return KindTime case nil: return KindNil diff --git a/libs/dyn/merge/override_test.go b/libs/dyn/merge/override_test.go index 9d41a526ea..264c32e5e2 100644 --- a/libs/dyn/merge/override_test.go +++ b/libs/dyn/merge/override_test.go @@ -83,16 +83,16 @@ func TestOverride_Primitive(t *testing.T) { { name: "time (updated)", state: visitorState{updated: []string{"root"}}, - left: dyn.NewValue(time.UnixMilli(10000), []dyn.Location{leftLocation}), - right: dyn.NewValue(time.UnixMilli(10001), []dyn.Location{rightLocation}), - expected: dyn.NewValue(time.UnixMilli(10001), []dyn.Location{rightLocation}), + left: dyn.NewValue(dyn.FromTime(time.UnixMilli(10000)), []dyn.Location{leftLocation}), + right: dyn.NewValue(dyn.FromTime(time.UnixMilli(10001)), []dyn.Location{rightLocation}), + expected: dyn.NewValue(dyn.FromTime(time.UnixMilli(10001)), []dyn.Location{rightLocation}), }, { name: "time (not updated)", state: visitorState{}, - left: dyn.NewValue(time.UnixMilli(10000), []dyn.Location{leftLocation}), - right: dyn.NewValue(time.UnixMilli(10000), []dyn.Location{rightLocation}), - expected: dyn.NewValue(time.UnixMilli(10000), []dyn.Location{leftLocation}), + left: dyn.NewValue(dyn.FromTime(time.UnixMilli(10000)), []dyn.Location{leftLocation}), + right: dyn.NewValue(dyn.FromTime(time.UnixMilli(10000)), []dyn.Location{rightLocation}), + expected: dyn.NewValue(dyn.FromTime(time.UnixMilli(10000)), []dyn.Location{leftLocation}), }, { name: "different types (updated)", diff --git a/libs/dyn/time.go b/libs/dyn/time.go new file mode 100644 index 0000000000..b3b3de12c0 --- /dev/null +++ b/libs/dyn/time.go @@ -0,0 +1,62 @@ +package dyn + +import ( + "fmt" + "time" +) + +// Time represents a time-like primitive value. +// +// It represents a timestamp and includes the original string value +// that was parsed to create the timestamp. This makes it possible +// to coalesce a value that YAML interprets as a timestamp back into +// a string without losing information. +type Time struct { + t time.Time + s string +} + +// NewTime creates a new Time from the given string. +func NewTime(str string) (Time, error) { + // Try a couple of layouts + for _, layout := range []string{ + "2006-1-2T15:4:5.999999999Z07:00", // RCF3339Nano with short date fields. + "2006-1-2t15:4:5.999999999Z07:00", // RFC3339Nano with short date fields and lower-case "t". + "2006-1-2 15:4:5.999999999", // space separated with no time zone + "2006-1-2", // date only + } { + t, terr := time.Parse(layout, str) + if terr == nil { + return Time{t: t, s: str}, nil + } + } + + return Time{}, fmt.Errorf("invalid time value: %q", str) +} + +// MustTime creates a new Time from the given string. +// It panics if the string cannot be parsed. +func MustTime(str string) Time { + t, err := NewTime(str) + if err != nil { + panic(err) + } + return t +} + +// FromTime creates a new Time from the given time.Time. +// It uses the RFC3339Nano format for its string representation. +// This guarantees that it can roundtrip into a string without losing information. +func FromTime(t time.Time) Time { + return Time{t: t, s: t.Format(time.RFC3339Nano)} +} + +// Time returns the time.Time value. +func (t Time) Time() time.Time { + return t.t +} + +// String returns the original string value that was parsed to create the timestamp. +func (t Time) String() string { + return t.s +} diff --git a/libs/dyn/time_test.go b/libs/dyn/time_test.go new file mode 100644 index 0000000000..653d6855f0 --- /dev/null +++ b/libs/dyn/time_test.go @@ -0,0 +1,41 @@ +package dyn_test + +import ( + "testing" + "time" + + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" +) + +func TestTimeValid(t *testing.T) { + for _, tc := range []string{ + "2024-08-29", + "2024-01-15T12:34:56.789012345Z", + } { + tm, err := dyn.NewTime(tc) + if assert.NoError(t, err) { + assert.NotEqual(t, time.Time{}, tm.Time()) + assert.Equal(t, tc, tm.String()) + } + } +} + +func TestTimeInvalid(t *testing.T) { + tm, err := dyn.NewTime("invalid") + assert.Error(t, err) + assert.Equal(t, dyn.Time{}, tm) +} + +func TestTimeFromTime(t *testing.T) { + tref := time.Now() + t1 := dyn.FromTime(tref) + + // Verify that the underlying value is the same. + assert.Equal(t, tref, t1.Time()) + + // Verify that the string representation can be used to construct the same. + t2, err := dyn.NewTime(t1.String()) + assert.NoError(t, err) + assert.True(t, t1.Time().Equal(t2.Time())) +} diff --git a/libs/dyn/value.go b/libs/dyn/value.go index 2aed2f6cd3..81524db7ec 100644 --- a/libs/dyn/value.go +++ b/libs/dyn/value.go @@ -127,7 +127,8 @@ func (v Value) AsAny() any { case KindFloat: return v.v case KindTime: - return v.v + t := v.v.(Time) + return t.Time() default: // Panic because we only want to deal with known types. panic(fmt.Sprintf("invalid kind: %d", v.k)) diff --git a/libs/dyn/value_underlying.go b/libs/dyn/value_underlying.go index 2f0f26a1f6..0a867375d0 100644 --- a/libs/dyn/value_underlying.go +++ b/libs/dyn/value_underlying.go @@ -2,7 +2,6 @@ package dyn import ( "fmt" - "time" ) // AsMap returns the underlying mapping if this value is a map, @@ -123,14 +122,14 @@ func (v Value) MustFloat() float64 { // AsTime returns the underlying time if this value is a time, // the zero value and false otherwise. -func (v Value) AsTime() (time.Time, bool) { - vv, ok := v.v.(time.Time) +func (v Value) AsTime() (Time, bool) { + vv, ok := v.v.(Time) return vv, ok } // MustTime returns the underlying time if this value is a time, // panics otherwise. -func (v Value) MustTime() time.Time { +func (v Value) MustTime() Time { vv, ok := v.AsTime() if !ok || v.k != KindTime { panic(fmt.Sprintf("expected kind %s, got %s", KindTime, v.k)) diff --git a/libs/dyn/value_underlying_test.go b/libs/dyn/value_underlying_test.go index e35cde5823..73baeeeb2d 100644 --- a/libs/dyn/value_underlying_test.go +++ b/libs/dyn/value_underlying_test.go @@ -143,7 +143,7 @@ func TestValueUnderlyingFloat(t *testing.T) { } func TestValueUnderlyingTime(t *testing.T) { - v := dyn.V(time.Now()) + v := dyn.V(dyn.FromTime(time.Now())) vv1, ok := v.AsTime() assert.True(t, ok) diff --git a/libs/dyn/yamlloader/loader.go b/libs/dyn/yamlloader/loader.go index fbb52b5045..c3e8d08107 100644 --- a/libs/dyn/yamlloader/loader.go +++ b/libs/dyn/yamlloader/loader.go @@ -5,7 +5,6 @@ import ( "math" "strconv" "strings" - "time" "github.com/databricks/cli/libs/dyn" "gopkg.in/yaml.v3" @@ -207,17 +206,9 @@ func (d *loader) loadScalar(node *yaml.Node, loc dyn.Location) (dyn.Value, error case "!!null": return dyn.NewValue(nil, []dyn.Location{loc}), nil case "!!timestamp": - // Try a couple of layouts - for _, layout := range []string{ - "2006-1-2T15:4:5.999999999Z07:00", // RCF3339Nano with short date fields. - "2006-1-2t15:4:5.999999999Z07:00", // RFC3339Nano with short date fields and lower-case "t". - "2006-1-2 15:4:5.999999999", // space separated with no time zone - "2006-1-2", // date only - } { - t, terr := time.Parse(layout, node.Value) - if terr == nil { - return dyn.NewValue(t, []dyn.Location{loc}), nil - } + t, err := dyn.NewTime(node.Value) + if err == nil { + return dyn.NewValue(t, []dyn.Location{loc}), nil } return dyn.InvalidValue, errorf(loc, "invalid timestamp value: %v", node.Value) default: diff --git a/libs/dyn/yamlsaver/saver.go b/libs/dyn/yamlsaver/saver.go index fe4cfb8549..f4c7157f2e 100644 --- a/libs/dyn/yamlsaver/saver.go +++ b/libs/dyn/yamlsaver/saver.go @@ -129,7 +129,7 @@ func (s *saver) toYamlNodeWithStyle(v dyn.Value, style yaml.Style) (*yaml.Node, case dyn.KindFloat: return &yaml.Node{Kind: yaml.ScalarNode, Value: fmt.Sprint(v.MustFloat()), Style: style}, nil case dyn.KindTime: - return &yaml.Node{Kind: yaml.ScalarNode, Value: v.MustTime().UTC().String(), Style: style}, nil + return &yaml.Node{Kind: yaml.ScalarNode, Value: v.MustTime().String(), Style: style}, nil default: // Panic because we only want to deal with known types. panic(fmt.Sprintf("invalid kind: %d", v.Kind())) diff --git a/libs/dyn/yamlsaver/saver_test.go b/libs/dyn/yamlsaver/saver_test.go index 387090104f..aa481c20b7 100644 --- a/libs/dyn/yamlsaver/saver_test.go +++ b/libs/dyn/yamlsaver/saver_test.go @@ -2,10 +2,10 @@ package yamlsaver import ( "testing" - "time" "github.com/databricks/cli/libs/dyn" assert "github.com/databricks/cli/libs/dyn/dynassert" + "github.com/stretchr/testify/require" "gopkg.in/yaml.v3" ) @@ -45,11 +45,14 @@ func TestMarshalBoolValue(t *testing.T) { } func TestMarshalTimeValue(t *testing.T) { + tm, err := dyn.NewTime("1970-01-01") + require.NoError(t, err) + s := NewSaver() - var timeValue = dyn.V(time.Unix(0, 0)) + var timeValue = dyn.V(tm) v, err := s.toYamlNode(timeValue) assert.NoError(t, err) - assert.Equal(t, "1970-01-01 00:00:00 +0000 UTC", v.Value) + assert.Equal(t, "1970-01-01", v.Value) assert.Equal(t, yaml.ScalarNode, v.Kind) } From 5fac7edcdf5f3e371920c2907a311cd1564bebf8 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 29 Aug 2024 16:41:12 +0200 Subject: [PATCH 07/22] Pass along $AZURE_CONFIG_FILE to Terraform process (#1734) ## Changes This ensures that the CLI and Terraform can both use an Azure CLI session configured under a non-standard path. This is the default behavior on Azure DevOps when using the AzureCLI@2 task. Fixes #1722. ## Tests Unit test. --- bundle/deploy/terraform/init.go | 7 +++++++ bundle/deploy/terraform/init_test.go | 19 ++++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/bundle/deploy/terraform/init.go b/bundle/deploy/terraform/init.go index e7f720d088..7d75ee8a81 100644 --- a/bundle/deploy/terraform/init.go +++ b/bundle/deploy/terraform/init.go @@ -111,6 +111,13 @@ func inheritEnvVars(ctx context.Context, environ map[string]string) error { environ["PATH"] = path } + // Include $AZURE_CONFIG_FILE in set of environment variables to pass along. + // This is set in Azure DevOps by the AzureCLI@2 task. + azureConfigFile, ok := env.Lookup(ctx, "AZURE_CONFIG_FILE") + if ok { + environ["AZURE_CONFIG_FILE"] = azureConfigFile + } + // Include $TF_CLI_CONFIG_FILE to override terraform provider in development. // See: https://developer.hashicorp.com/terraform/cli/config/config-file#explicit-installation-method-configuration devConfigFile, ok := env.Lookup(ctx, "TF_CLI_CONFIG_FILE") diff --git a/bundle/deploy/terraform/init_test.go b/bundle/deploy/terraform/init_test.go index 94e47dbc11..450e7eb6aa 100644 --- a/bundle/deploy/terraform/init_test.go +++ b/bundle/deploy/terraform/init_test.go @@ -269,19 +269,20 @@ func TestSetUserAgentExtraEnvVar(t *testing.T) { } func TestInheritEnvVars(t *testing.T) { - env := map[string]string{} - t.Setenv("HOME", "/home/testuser") t.Setenv("PATH", "/foo:/bar") t.Setenv("TF_CLI_CONFIG_FILE", "/tmp/config.tfrc") + t.Setenv("AZURE_CONFIG_FILE", "/tmp/foo/bar") - err := inheritEnvVars(context.Background(), env) - - require.NoError(t, err) - - require.Equal(t, env["HOME"], "/home/testuser") - require.Equal(t, env["PATH"], "/foo:/bar") - require.Equal(t, env["TF_CLI_CONFIG_FILE"], "/tmp/config.tfrc") + ctx := context.Background() + env := map[string]string{} + err := inheritEnvVars(ctx, env) + if assert.NoError(t, err) { + assert.Equal(t, "/home/testuser", env["HOME"]) + assert.Equal(t, "/foo:/bar", env["PATH"]) + assert.Equal(t, "/tmp/config.tfrc", env["TF_CLI_CONFIG_FILE"]) + assert.Equal(t, "/tmp/foo/bar", env["AZURE_CONFIG_FILE"]) + } } func TestSetUserProfileFromInheritEnvVars(t *testing.T) { From 2e000f1ebd0f22933bb2fd64ee7a9f44cb53b5d8 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Thu, 29 Aug 2024 21:07:21 +0200 Subject: [PATCH 08/22] Use materialized views in the default-sql template (#1709) ## Changes Materialized views now support `CREATE OR REPLACE` ([docs](https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-materialized-view.html))! This makes it possible to use them with Workflows in DABs.This PR updates the template to use a materialized view rather than a regular view. ## Tests Manually validated in production. --- .../template/{{.project_name}}/src/orders_daily.sql.tmpl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl index 8a9d12ea8f..e5ceb77a9c 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl @@ -1,10 +1,9 @@ -- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql_job.yml) -{{- /* We can't use a materialized view here since they don't support 'create or refresh' yet.*/}} USE CATALOG {{"{{"}}catalog{{"}}"}}; USE IDENTIFIER({{"{{"}}schema{{"}}"}}); -CREATE OR REPLACE VIEW +CREATE OR REPLACE MATERIALIZED VIEW orders_daily AS SELECT order_date, count(*) AS number_of_orders From ed4a4585c07602e84a8ce97336688463d9833527 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Fri, 30 Aug 2024 09:32:10 +0200 Subject: [PATCH 09/22] Update templates to latest LTS DBR (#1715) ## Changes This updates the templates to use the latest DBR 15 LTS version. - [x] DB Connect 15.4 must be released + validated before this can go out --- libs/template/templates/dbt-sql/library/versions.tmpl | 4 ++-- libs/template/templates/default-python/library/versions.tmpl | 4 ++-- libs/template/templates/default-sql/library/versions.tmpl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libs/template/templates/dbt-sql/library/versions.tmpl b/libs/template/templates/dbt-sql/library/versions.tmpl index f9a879d25a..7d0c88e7df 100644 --- a/libs/template/templates/dbt-sql/library/versions.tmpl +++ b/libs/template/templates/dbt-sql/library/versions.tmpl @@ -1,7 +1,7 @@ {{define "latest_lts_dbr_version" -}} - 13.3.x-scala2.12 + 15.4.x-scala2.12 {{- end}} {{define "latest_lts_db_connect_version_spec" -}} - >=13.3,<13.4 + >=15.4,<15.5 {{- end}} diff --git a/libs/template/templates/default-python/library/versions.tmpl b/libs/template/templates/default-python/library/versions.tmpl index f9a879d25a..7d0c88e7df 100644 --- a/libs/template/templates/default-python/library/versions.tmpl +++ b/libs/template/templates/default-python/library/versions.tmpl @@ -1,7 +1,7 @@ {{define "latest_lts_dbr_version" -}} - 13.3.x-scala2.12 + 15.4.x-scala2.12 {{- end}} {{define "latest_lts_db_connect_version_spec" -}} - >=13.3,<13.4 + >=15.4,<15.5 {{- end}} diff --git a/libs/template/templates/default-sql/library/versions.tmpl b/libs/template/templates/default-sql/library/versions.tmpl index f9a879d25a..7d0c88e7df 100644 --- a/libs/template/templates/default-sql/library/versions.tmpl +++ b/libs/template/templates/default-sql/library/versions.tmpl @@ -1,7 +1,7 @@ {{define "latest_lts_dbr_version" -}} - 13.3.x-scala2.12 + 15.4.x-scala2.12 {{- end}} {{define "latest_lts_db_connect_version_spec" -}} - >=13.3,<13.4 + >=15.4,<15.5 {{- end}} From 70ce80251853ae6443ea280ba6b2bc4614c7267a Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Fri, 30 Aug 2024 15:29:00 +0200 Subject: [PATCH 10/22] PythonMutator: preserve normalize diagnostics (#1735) ## Changes Preserve diagnostics if there are any errors or warnings when PythonMutator normalizes output. If anything goes wrong during conversion, diagnostics contain the relevant location and path. ## Tests Unit tests --- .../config/mutator/python/python_mutator.go | 35 +++++++++++-------- .../mutator/python/python_mutator_test.go | 20 ++++++++++- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index 4f44df0a99..bf30f07048 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -205,10 +205,8 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr) } - output, err := loadOutputFile(rootPath, outputPath) - if err != nil { - return dyn.InvalidValue, diag.Errorf("failed to load Python mutator output: %s", err) - } + output, outputDiags := loadOutputFile(rootPath, outputPath) + pythonDiagnostics = pythonDiagnostics.Extend(outputDiags) // we pass through pythonDiagnostic because it contains warnings return output, pythonDiagnostics @@ -225,10 +223,10 @@ func writeInputFile(inputPath string, input dyn.Value) error { return os.WriteFile(inputPath, rootConfigJson, 0600) } -func loadOutputFile(rootPath string, outputPath string) (dyn.Value, error) { +func loadOutputFile(rootPath string, outputPath string) (dyn.Value, diag.Diagnostics) { outputFile, err := os.Open(outputPath) if err != nil { - return dyn.InvalidValue, fmt.Errorf("failed to open output file: %w", err) + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to open output file: %w", err)) } defer outputFile.Close() @@ -243,27 +241,34 @@ func loadOutputFile(rootPath string, outputPath string) (dyn.Value, error) { // for that, we pass virtualPath instead of outputPath as file location virtualPath, err := filepath.Abs(filepath.Join(rootPath, "__generated_by_pydabs__.yml")) if err != nil { - return dyn.InvalidValue, fmt.Errorf("failed to get absolute path: %w", err) + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to get absolute path: %w", err)) } generated, err := yamlloader.LoadYAML(virtualPath, outputFile) if err != nil { - return dyn.InvalidValue, fmt.Errorf("failed to parse output file: %w", err) + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err)) } - normalized, diagnostic := convert.Normalize(config.Root{}, generated) - if diagnostic.Error() != nil { - return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %w", diagnostic.Error()) - } + return strictNormalize(config.Root{}, generated) +} + +func strictNormalize(dst any, generated dyn.Value) (dyn.Value, diag.Diagnostics) { + normalized, diags := convert.Normalize(dst, generated) // warnings shouldn't happen because output should be already normalized // when it happens, it's a bug in the mutator, and should be treated as an error - for _, d := range diagnostic.Filter(diag.Warning) { - return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %s", d.Summary) + strictDiags := diag.Diagnostics{} + + for _, d := range diags { + if d.Severity == diag.Warning { + d.Severity = diag.Error + } + + strictDiags = strictDiags.Append(d) } - return normalized, nil + return normalized, strictDiags } // loadDiagnosticsFile loads diagnostics from a file. diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index ea02d1cede..26350d886b 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -10,6 +10,8 @@ import ( "runtime" "testing" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/cli/libs/dyn/merge" "github.com/databricks/cli/bundle/env" @@ -255,7 +257,7 @@ func TestPythonMutator_badOutput(t *testing.T) { mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) - assert.EqualError(t, diag.Error(), "failed to load Python mutator output: failed to normalize output: unknown field: unknown_property") + assert.EqualError(t, diag.Error(), "unknown field: unknown_property") } func TestPythonMutator_disabled(t *testing.T) { @@ -546,6 +548,22 @@ func TestInterpreterPath(t *testing.T) { } } +func TestStrictNormalize(t *testing.T) { + // NB: there is no way to trigger diag.Error, so we don't test it + + type TestStruct struct { + A int `json:"a"` + } + + value := dyn.NewValue(map[string]dyn.Value{"A": dyn.NewValue("abc", nil)}, nil) + + _, diags := convert.Normalize(TestStruct{}, value) + _, strictDiags := strictNormalize(TestStruct{}, value) + + assert.False(t, diags.HasError()) + assert.True(t, strictDiags.HasError()) +} + func withProcessStub(t *testing.T, args []string, output string, diagnostics string) context.Context { ctx := context.Background() ctx, stub := process.WithStub(ctx) From 5d9910c8e0f3119276b6ee4afd534ac82d4fb6c2 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Mon, 2 Sep 2024 14:09:08 +0530 Subject: [PATCH 11/22] Make lock optional in the JSON schema (#1738) Fixes https://github.com/databricks/cli/issues/1561 --- bundle/config/deployment.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle/config/deployment.go b/bundle/config/deployment.go index 7f0f57a8c3..b7efb4456f 100644 --- a/bundle/config/deployment.go +++ b/bundle/config/deployment.go @@ -6,5 +6,5 @@ type Deployment struct { FailOnActiveRuns bool `json:"fail_on_active_runs,omitempty"` // Lock configures locking behavior on deployment. - Lock Lock `json:"lock"` + Lock Lock `json:"lock,omitempty"` } From 582558cac22017c2265b384c5380179975dc6f3d Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 2 Sep 2024 11:17:18 +0200 Subject: [PATCH 12/22] Do not suppress normalisation diagnostics for resolving variables (#1740) ## Changes Tested on the following bundle configuration ``` bundle: name: clusters mode: development variables: webhook_notifications: description: Webhook URL for notifications type: complex default: on_failure: id: 6a6c04c1-389c-4534-95af-b68b62a9dbe6 resources: jobs: test_job: name: "Andrew Nester Test Job" tasks: - task_key: test_task notebook_task: notebook_path: "./src/test.py" new_cluster: num_workers: 2 node_type_id: "i3.xlarge" autoscale: min_workers: 2 max_workers: 7 spark_version: "12.2.x-scala2.12" spark_conf: "spark.executor.memory": "2g" webhook_notifications: ${var.webhook_notifications} ``` bundle validate output is below ``` andrew.nester@HFW9Y94129 wheel % databricks bundle validate Warning: expected sequence, found map at resources.jobs.test_job.webhook_notifications.on_failure in bundle.yml:11:9 Name: clusters Target: default Workspace: User: andrew.nester@databricks.com Path: /Users/andrew.nester@databricks.com/.bundle/clusters/default ``` **Note** that error correctly points to the variable --- .../config/mutator/resolve_variable_references.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 61940be567..5e5b761090 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -10,7 +10,6 @@ import ( "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn/convert" "github.com/databricks/cli/libs/dyn/dynvar" - "github.com/databricks/cli/libs/log" ) type resolveVariableReferences struct { @@ -124,6 +123,7 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) // We rewrite it here to make the resolution logic simpler. varPath := dyn.NewPath(dyn.Key("var")) + var diags diag.Diagnostics err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { // Synthesize a copy of the root that has all fields that are present in the type // but not set in the dynamic value set to their corresponding empty value. @@ -180,14 +180,13 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) // Normalize the result because variable resolution may have been applied to non-string fields. // For example, a variable reference may have been resolved to a integer. - root, diags := convert.Normalize(b.Config, root) - for _, diag := range diags { - // This occurs when a variable's resolved value is incompatible with the field's type. - // Log a warning until we have a better way to surface these diagnostics to the user. - log.Warnf(ctx, "normalization diagnostic: %s", diag.Summary) - } + root, normaliseDiags := convert.Normalize(b.Config, root) + diags = diags.Extend(normaliseDiags) return root, nil }) - return diag.FromErr(err) + if err != nil { + diags = diags.Extend(diag.FromErr(err)) + } + return diags } From ed448815b4e9881494d3ef561ec84ef694ea4b11 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Mon, 2 Sep 2024 11:49:30 +0200 Subject: [PATCH 13/22] PythonMutator: explain missing package error (#1736) ## Changes Explain the error when the `databricks-pydabs` package is not installed or the Python environment isn't correctly activated. Example output: ``` Error: python mutator process failed: ".venv/bin/python3 -m databricks.bundles.build --phase load --input .../input.json --output .../output.json --diagnostics .../diagnostics.json: exit status 1", use --debug to enable logging .../.venv/bin/python3: Error while finding module specification for 'databricks.bundles.build' (ModuleNotFoundError: No module named 'databricks') Explanation: 'databricks-pydabs' library is not installed in the Python environment. If using Python wheels, ensure that 'databricks-pydabs' is included in the dependencies, and that the wheel is installed in the Python environment: $ .venv/bin/pip install -e . If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml, or activate the environment before running CLI commands: experimental: pydabs: venv_path: .venv ``` ## Tests Unit tests --- .../config/mutator/python/python_mutator.go | 49 +++++++++++++++++-- .../mutator/python/python_mutator_test.go | 24 +++++++++ 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index bf30f07048..fbf3b7e0be 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -1,15 +1,21 @@ package python import ( + "bytes" "context" "encoding/json" "errors" "fmt" + "io" "os" "path/filepath" - "github.com/databricks/cli/libs/python" "github.com/databricks/databricks-sdk-go/logger" + "github.com/fatih/color" + + "strings" + + "github.com/databricks/cli/libs/python" "github.com/databricks/cli/bundle/env" @@ -169,7 +175,11 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err) } - stderrWriter := newLogWriter(ctx, "stderr: ") + stderrBuf := bytes.Buffer{} + stderrWriter := io.MultiWriter( + newLogWriter(ctx, "stderr: "), + &stderrBuf, + ) stdoutWriter := newLogWriter(ctx, "stdout: ") _, processErr := process.Background( @@ -197,7 +207,13 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r // process can fail without reporting errors in diagnostics file or creating it, for instance, // venv doesn't have PyDABs library installed if processErr != nil { - return dyn.InvalidValue, diag.Errorf("python mutator process failed: %sw, use --debug to enable logging", processErr) + diagnostic := diag.Diagnostic{ + Severity: diag.Error, + Summary: fmt.Sprintf("python mutator process failed: %q, use --debug to enable logging", processErr), + Detail: explainProcessErr(stderrBuf.String()), + } + + return dyn.InvalidValue, diag.Diagnostics{diagnostic} } // or we can fail to read diagnostics file, that should always be created @@ -212,6 +228,33 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r return output, pythonDiagnostics } +const installExplanation = `If using Python wheels, ensure that 'databricks-pydabs' is included in the dependencies, +and that the wheel is installed in the Python environment: + + $ .venv/bin/pip install -e . + +If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml, +or activate the environment before running CLI commands: + + experimental: + pydabs: + venv_path: .venv +` + +// explainProcessErr provides additional explanation for common errors. +// It's meant to be the best effort, and not all errors are covered. +// Output should be used only used for error reporting. +func explainProcessErr(stderr string) string { + // implemented in cpython/Lib/runpy.py and portable across Python 3.x, including pypy + if strings.Contains(stderr, "Error while finding module specification for 'databricks.bundles.build'") { + summary := color.CyanString("Explanation: ") + "'databricks-pydabs' library is not installed in the Python environment.\n" + + return stderr + "\n" + summary + "\n" + installExplanation + } + + return stderr +} + func writeInputFile(inputPath string, input dyn.Value) error { // we need to marshal dyn.Value instead of bundle.Config to JSON to support // non-string fields assigned with bundle variables diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index 26350d886b..bf12b24997 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -564,6 +564,30 @@ func TestStrictNormalize(t *testing.T) { assert.True(t, strictDiags.HasError()) } +func TestExplainProcessErr(t *testing.T) { + stderr := "/home/test/.venv/bin/python3: Error while finding module specification for 'databricks.bundles.build' (ModuleNotFoundError: No module named 'databricks')\n" + expected := `/home/test/.venv/bin/python3: Error while finding module specification for 'databricks.bundles.build' (ModuleNotFoundError: No module named 'databricks') + +Explanation: 'databricks-pydabs' library is not installed in the Python environment. + +If using Python wheels, ensure that 'databricks-pydabs' is included in the dependencies, +and that the wheel is installed in the Python environment: + + $ .venv/bin/pip install -e . + +If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml, +or activate the environment before running CLI commands: + + experimental: + pydabs: + venv_path: .venv +` + + out := explainProcessErr(stderr) + + assert.Equal(t, expected, out) +} + func withProcessStub(t *testing.T, args []string, output string, diagnostics string) context.Context { ctx := context.Background() ctx, stub := process.WithStub(ctx) From 0ce7be8ff07dcf1f57d41b620b209f73e20df118 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 14:39:16 +0200 Subject: [PATCH 14/22] Bump github.com/Masterminds/semver/v3 from 3.2.1 to 3.3.0 (#1741) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [github.com/Masterminds/semver/v3](https://github.com/Masterminds/semver) from 3.2.1 to 3.3.0.
Release notes

Sourced from github.com/Masterminds/semver/v3's releases.

v3.3.0

What's Changed

New Contributors

Full Changelog: https://github.com/Masterminds/semver/compare/v3.2.1...v3.3.0

Changelog

Sourced from github.com/Masterminds/semver/v3's changelog.

3.3.0 (2024-08-27)

Added

Changed

  • #241: Simplify StrictNewVersion parsing (thanks @​grosser)
  • Testing support up through Go 1.23
  • Minimum version set to 1.21 as this is what's tested now
  • Fuzz testing now supports caching
Commits
  • e6e3d4d Merge pull request #249 from mattfarina/update-changelog-3.3.0
  • e80c4ea Updating changelog for 3.3.0
  • 80427ad Merge pull request #248 from mattfarina/bump-min-version
  • b610837 bumping min version in go.mod based on what's tested
  • a4cccd8 Merge pull request #246 from mattfarina/bump-go-1.23
  • 7c178cf Updating the testing version of Go used
  • 29f94c1 Merge pull request #241 from grosser/grosser/validate
  • 2cf1b16 Merge pull request #245 from mattfarina/remove-vert
  • b55476a Removing reference to vert
  • d07450b simplify StrictNewVersion
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/Masterminds/semver/v3&package-manager=go_modules&previous-version=3.2.1&new-version=3.3.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 4aa279921d..9777106c2a 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/databricks/cli go 1.22 require ( - github.com/Masterminds/semver/v3 v3.2.1 // MIT + github.com/Masterminds/semver/v3 v3.3.0 // MIT github.com/briandowns/spinner v1.23.1 // Apache 2.0 github.com/databricks/databricks-sdk-go v0.45.0 // Apache 2.0 github.com/fatih/color v1.17.0 // MIT diff --git a/go.sum b/go.sum index 2e58948aad..b232e8e437 100644 --- a/go.sum +++ b/go.sum @@ -8,8 +8,8 @@ cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1h dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk= dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0= -github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= +github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0= +github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= github.com/ProtonMail/go-crypto v1.1.0-alpha.2 h1:bkyFVUP+ROOARdgCiJzNQo2V2kiB97LyUpzH9P6Hrlg= From 096123674afb6c593f2c242c3210fa5735ed98cb Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Mon, 2 Sep 2024 19:13:17 +0530 Subject: [PATCH 15/22] Fix streaming of stdout, stdin, stderr in cobra test runner (#1742) ## Changes We were not using the readers and writers set in the test fixtures in the progress logger. This PR fixes that. It also modifies `TestAccAbortBind`, which was implicitly relying on the bug. I encountered this bug while working on https://github.com/databricks/cli/pull/1672. ## Tests Manually. From non-tty: ``` Error: failed to bind the resource, err: This bind operation requires user confirmation, but the current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed. ``` From tty, bind works as expected. ``` Confirm import changes? Changes will be remotely applied only after running 'bundle deploy'. [y/n]: y Updating deployment state... Successfully bound databricks_pipeline with an id '9d2dedbb-f522-4503-96ba-4bc4d5bfa77d'. Run 'bundle deploy' to deploy changes to your workspace ``` --- bundle/deploy/terraform/import.go | 5 +++++ cmd/root/progress_logger.go | 6 ++++++ internal/bundle/bind_resource_test.go | 8 ++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/bundle/deploy/terraform/import.go b/bundle/deploy/terraform/import.go index 7c1a681583..dfe60a5814 100644 --- a/bundle/deploy/terraform/import.go +++ b/bundle/deploy/terraform/import.go @@ -69,6 +69,11 @@ func (m *importResource) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn // Remove output starting from Warning until end of output output = output[:bytes.Index([]byte(output), []byte("Warning:"))] cmdio.LogString(ctx, output) + + if !cmdio.IsPromptSupported(ctx) { + return diag.Errorf("This bind operation requires user confirmation, but the current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed.") + } + ans, err := cmdio.AskYesOrNo(ctx, "Confirm import changes? Changes will be remotely applied only after running 'bundle deploy'.") if err != nil { return diag.FromErr(err) diff --git a/cmd/root/progress_logger.go b/cmd/root/progress_logger.go index c05ecb0437..7d6a1fa46d 100644 --- a/cmd/root/progress_logger.go +++ b/cmd/root/progress_logger.go @@ -29,6 +29,12 @@ func (f *progressLoggerFlag) resolveModeDefault(format flags.ProgressLogFormat) } func (f *progressLoggerFlag) initializeContext(ctx context.Context) (context.Context, error) { + // No need to initialize the logger if it's already set in the context. This + // happens in unit tests where the logger is setup as a fixture. + if _, ok := cmdio.FromContext(ctx); ok { + return ctx, nil + } + if f.log.level.String() != "disabled" && f.log.file.String() == "stderr" && f.ProgressLogFormat == flags.ModeInplace { return nil, fmt.Errorf("inplace progress logging cannot be used when log-file is stderr") diff --git a/internal/bundle/bind_resource_test.go b/internal/bundle/bind_resource_test.go index d44ad2c316..2449c31f2c 100644 --- a/internal/bundle/bind_resource_test.go +++ b/internal/bundle/bind_resource_test.go @@ -11,6 +11,7 @@ import ( "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/google/uuid" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -101,12 +102,15 @@ func TestAccAbortBind(t *testing.T) { destroyBundle(t, ctx, bundleRoot) }) + // Bind should fail because prompting is not possible. t.Setenv("BUNDLE_ROOT", bundleRoot) + t.Setenv("TERM", "dumb") c := internal.NewCobraTestRunner(t, "bundle", "deployment", "bind", "foo", fmt.Sprint(jobId)) - // Simulate user aborting the bind. This is done by not providing any input to the prompt in non-interactive mode. + // Expect error suggesting to use --auto-approve _, _, err = c.Run() - require.ErrorContains(t, err, "failed to bind the resource") + assert.ErrorContains(t, err, "failed to bind the resource") + assert.ErrorContains(t, err, "This bind operation requires user confirmation, but the current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed") err = deployBundle(t, ctx, bundleRoot) require.NoError(t, err) From 072fa812e28c32d374afdb5409e7320f1c5894c7 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Tue, 3 Sep 2024 09:51:54 +0200 Subject: [PATCH 16/22] Include a permissions section in all templates (#1713) ## Changes This updates the templates to include a `permissions` section. Having a permissions section is a best practice, is helpful to understand the notion of permissions, and helps diagnose permission errors (https://github.com/databricks/cli/pull/1386). This is a cherry-pick from https://github.com/databricks/cli/pull/1387. This change was verified to work both in dev and prod. Existing unit tests validate the validity of the templates in these modes. --- .../{{.project_name}}/databricks.yml.tmpl | 17 ++++---- .../{{.project_name}}/databricks.yml.tmpl | 36 ++++------------- .../{{.project_name}}/databricks.yml.tmpl | 39 ++++++------------- 3 files changed, 28 insertions(+), 64 deletions(-) diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl index fdda03c0d0..f96ce4fe68 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -12,8 +12,10 @@ include: targets: dev: default: true - # We use 'mode: development' to indicate this is a personal development copy. - # Any job schedules and triggers are paused by default. + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development workspace: host: {{workspace_host}} @@ -22,11 +24,10 @@ targets: mode: production workspace: host: {{workspace_host}} - # We always use /Users/{{user_name}} for all resources to make sure we only have a single copy. + # We explicitly specify /Users/{{user_name}} to make sure we only have a single copy. root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} - {{- if not is_service_principal}} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE run_as: - # This runs as {{user_name}} in production. We could also use a service principal here - # using service_principal_name (see the Databricks documentation). - user_name: {{user_name}} - {{- end}} + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl index e3572326b6..8544dc8348 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl @@ -7,44 +7,24 @@ include: - resources/*.yml targets: - # The 'dev' target, for development purposes. This target is the default. dev: - # We use 'mode: development' to indicate this is a personal development copy: + # The default target uses 'mode: development' to create a development copy. # - Deployed resources get prefixed with '[dev my_user_name]' - # - Any job schedules and triggers are paused by default - # - The 'development' mode is used for Delta Live Tables pipelines + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development default: true workspace: host: {{workspace_host}} - ## Optionally, there could be a 'staging' target here. - ## (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/ci-cd.html.) - # - # staging: - # workspace: - # host: {{workspace_host}} - - # The 'prod' target, used for production deployment. prod: - # We use 'mode: production' to indicate this is a production deployment. - # Doing so enables strict verification of the settings below. mode: production workspace: host: {{workspace_host}} - # We always use /Users/{{user_name}} for all resources to make sure we only have a single copy. - {{- /* - Internal note 2023-12: CLI versions v0.211.0 and before would show an error when using `mode: production` - with a path that doesn't say "/Shared". For now, we'll include an extra comment in the template - to explain that customers should update if they see this. - */}} - # If this path results in an error, please make sure you have a recent version of the CLI installed. + # We explicitly specify /Users/{{user_name}} to make sure we only have a single copy. root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE run_as: - {{- if is_service_principal}} - service_principal_name: {{user_name}} - {{- else}} - # This runs as {{user_name}} in production. We could also use a service principal here, - # see https://docs.databricks.com/dev-tools/bundles/permissions.html. - user_name: {{user_name}} - {{- end}} + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl index a47fb7c199..55c1aae4a9 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -18,16 +18,16 @@ variables: {{- $dev_schema := .shared_schema }} {{- $prod_schema := .shared_schema }} {{- if (regexp "^yes").MatchString .personal_schemas}} -{{- $dev_schema = "${workspace.current_user.short_name}"}} -{{- $prod_schema = "default"}} + {{- $dev_schema = "${workspace.current_user.short_name}"}} + {{- $prod_schema = "default"}} {{- end}} -# Deployment targets. targets: - # The 'dev' target, for development purposes. This target is the default. dev: - # We use 'mode: development' to indicate this is a personal development copy. - # Any job schedules and triggers are paused by default + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development default: true workspace: @@ -37,35 +37,18 @@ targets: catalog: {{.default_catalog}} schema: {{$dev_schema}} - ## Optionally, there could be a 'staging' target here. - ## (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/ci-cd.html.) - # - # staging: - # workspace: - # host: {{workspace_host}} - - # The 'prod' target, used for production deployment. prod: - # We use 'mode: production' to indicate this is a production deployment. - # Doing so enables strict verification of the settings below. mode: production workspace: host: {{workspace_host}} - # We always use /Users/{{user_name}} for all resources to make sure we only have a single copy. - {{- /* - Internal note 2023-12: CLI versions v0.211.0 and before would show an error when using `mode: production` - with a path that doesn't say "/Shared". For now, we'll include an extra comment in the template - to explain that customers should update if they see this. - */}} - # If this path results in an error, please make sure you have a recent version of the CLI installed. + # We explicitly specify /Users/{{user_name}} to make sure we only have a single copy. root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} variables: warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} catalog: {{.default_catalog}} schema: {{$prod_schema}} - {{- if not is_service_principal}} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE run_as: - # This runs as {{user_name}} in production. We could also use a service principal here - # using service_principal_name (see https://docs.databricks.com/en/dev-tools/bundles/permissions.html). - user_name: {{user_name}} - {{end -}} + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} From 35cdf4010dad156902d8d8b20b5fc0fe01de6acc Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 4 Sep 2024 12:44:21 +0530 Subject: [PATCH 17/22] Do not error if we cannot prompt for a profile in `auth login` (#1745) ## Changes With https://github.com/databricks/cli/pull/1370 we started to error if a profile name was not provided in a non-tty setting. The Databricks VSCode extension, however, uses the `auth login` command to simply refresh the tokens. Thus, this PR is a regression fix for that use case. ## Tests Manually, `databricks auth login --host https://e2-dogfood.staging.cloud.databricks.com` no longer errors. Instead it successfully refreshes the credentials. --- cmd/auth/login.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/auth/login.go b/cmd/auth/login.go index f87a2a0277..79b7954680 100644 --- a/cmd/auth/login.go +++ b/cmd/auth/login.go @@ -19,7 +19,7 @@ import ( func promptForProfile(ctx context.Context, defaultValue string) (string, error) { if !cmdio.IsInTTY(ctx) { - return "", fmt.Errorf("the command is being run in a non-interactive environment, please specify a profile using --profile") + return "", nil } prompt := cmdio.Prompt(ctx) From a27c24a397f91048677c38e48ee31213f278f640 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 4 Sep 2024 16:41:47 +0530 Subject: [PATCH 18/22] Add prompt when a pipeline recreation happens (#1672) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Changes DLT pipeline recreations are destructive. They can lead to lost history of previous updates, outage of the tables temporarily and are potentially computationally expensive. Thus we make a breaking change where a prompt is shown to the user if there configuration changes will lead to a DLT recreation. Users can skip the prompt by specifying the `--auto-approve` flag. This PR also fixes an issue with our test runner where logs from the cmdio.Logger would not get propagated to the reader returned by our cobra test runner. ## Tests Manually, and new unit and integration tests. ``` ➜ bundle-playground-3 cli bundle deploy Uploading bundle files to /Users/63ec021d-b0c6-49c0-93a0-5123953a1cb2/.bundle/test/development/files... The following DLT pipelines will be recreated. Underlying tables will be unavailable for a transient period until the newly recreated pipelines are run once successfully. History of previous pipeline update runs will be lost because of recreation: recreate pipeline foo Would you like to proceed? [y/n]: n Deployment cancelled! ``` --- bundle/phases/deploy.go | 92 +++++++++++++------ bundle/phases/deploy_test.go | 67 ++++++++++++++ .../databricks_template_schema.json | 8 ++ .../template/databricks.yml.tmpl | 25 +++++ .../bundles/recreate_pipeline/template/nb.sql | 2 + internal/bundle/deploy_test.go | 91 +++++++++++++++++- 6 files changed, 258 insertions(+), 27 deletions(-) create mode 100644 bundle/phases/deploy_test.go create mode 100644 internal/bundle/bundles/recreate_pipeline/databricks_template_schema.json create mode 100644 internal/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl create mode 100644 internal/bundle/bundles/recreate_pipeline/template/nb.sql diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index ca967c321a..49544227ec 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -19,55 +19,95 @@ import ( "github.com/databricks/cli/bundle/scripts" "github.com/databricks/cli/libs/cmdio" terraformlib "github.com/databricks/cli/libs/terraform" + tfjson "github.com/hashicorp/terraform-json" ) -func approvalForUcSchemaDelete(ctx context.Context, b *bundle.Bundle) (bool, error) { - tf := b.Terraform - if tf == nil { - return false, fmt.Errorf("terraform not initialized") - } - - // read plan file - plan, err := tf.ShowPlanFile(ctx, b.Plan.Path) - if err != nil { - return false, err - } - - actions := make([]terraformlib.Action, 0) - for _, rc := range plan.ResourceChanges { - // We only care about destructive actions on UC schema resources. - if rc.Type != "databricks_schema" { +func parseTerraformActions(changes []*tfjson.ResourceChange, toInclude func(typ string, actions tfjson.Actions) bool) []terraformlib.Action { + res := make([]terraformlib.Action, 0) + for _, rc := range changes { + if !toInclude(rc.Type, rc.Change.Actions) { continue } var actionType terraformlib.ActionType - switch { case rc.Change.Actions.Delete(): actionType = terraformlib.ActionTypeDelete case rc.Change.Actions.Replace(): actionType = terraformlib.ActionTypeRecreate default: - // We don't need a prompt for non-destructive actions like creating - // or updating a schema. + // No use case for other action types yet. continue } - actions = append(actions, terraformlib.Action{ + res = append(res, terraformlib.Action{ Action: actionType, ResourceType: rc.Type, ResourceName: rc.Name, }) } - // No restricted actions planned. No need for approval. - if len(actions) == 0 { + return res +} + +func approvalForDeploy(ctx context.Context, b *bundle.Bundle) (bool, error) { + tf := b.Terraform + if tf == nil { + return false, fmt.Errorf("terraform not initialized") + } + + // read plan file + plan, err := tf.ShowPlanFile(ctx, b.Plan.Path) + if err != nil { + return false, err + } + + schemaActions := parseTerraformActions(plan.ResourceChanges, func(typ string, actions tfjson.Actions) bool { + // Filter in only UC schema resources. + if typ != "databricks_schema" { + return false + } + + // We only display prompts for destructive actions like deleting or + // recreating a schema. + return actions.Delete() || actions.Replace() + }) + + dltActions := parseTerraformActions(plan.ResourceChanges, func(typ string, actions tfjson.Actions) bool { + // Filter in only DLT pipeline resources. + if typ != "databricks_pipeline" { + return false + } + + // Recreating DLT pipeline leads to metadata loss and for a transient period + // the underling tables will be unavailable. + return actions.Replace() || actions.Delete() + }) + + // We don't need to display any prompts in this case. + if len(dltActions) == 0 && len(schemaActions) == 0 { return true, nil } - cmdio.LogString(ctx, "The following UC schemas will be deleted or recreated. Any underlying data may be lost:") - for _, action := range actions { - cmdio.Log(ctx, action) + // One or more UC schema resources will be deleted or recreated. + if len(schemaActions) != 0 { + cmdio.LogString(ctx, "The following UC schemas will be deleted or recreated. Any underlying data may be lost:") + for _, action := range schemaActions { + cmdio.Log(ctx, action) + } + } + + // One or more DLT pipelines is being recreated. + if len(dltActions) != 0 { + msg := ` +This action will result in the deletion or recreation of the following DLT Pipelines along with the +Streaming Tables (STs) and Materialized Views (MVs) managed by them. Recreating the Pipelines will +restore the defined STs and MVs through full refresh. Note that recreation is necessary when pipeline +properties such as the 'catalog' or 'storage' are changed:` + cmdio.LogString(ctx, msg) + for _, action := range dltActions { + cmdio.Log(ctx, action) + } } if b.AutoApprove { @@ -126,7 +166,7 @@ func Deploy() bundle.Mutator { terraform.CheckRunningResource(), terraform.Plan(terraform.PlanGoal("deploy")), bundle.If( - approvalForUcSchemaDelete, + approvalForDeploy, deployCore, bundle.LogString("Deployment cancelled!"), ), diff --git a/bundle/phases/deploy_test.go b/bundle/phases/deploy_test.go new file mode 100644 index 0000000000..e00370b380 --- /dev/null +++ b/bundle/phases/deploy_test.go @@ -0,0 +1,67 @@ +package phases + +import ( + "testing" + + terraformlib "github.com/databricks/cli/libs/terraform" + tfjson "github.com/hashicorp/terraform-json" + "github.com/stretchr/testify/assert" +) + +func TestParseTerraformActions(t *testing.T) { + changes := []*tfjson.ResourceChange{ + { + Type: "databricks_pipeline", + Change: &tfjson.Change{ + Actions: tfjson.Actions{tfjson.ActionCreate}, + }, + Name: "create pipeline", + }, + { + Type: "databricks_pipeline", + Change: &tfjson.Change{ + Actions: tfjson.Actions{tfjson.ActionDelete}, + }, + Name: "delete pipeline", + }, + { + Type: "databricks_pipeline", + Change: &tfjson.Change{ + Actions: tfjson.Actions{tfjson.ActionDelete, tfjson.ActionCreate}, + }, + Name: "recreate pipeline", + }, + { + Type: "databricks_whatever", + Change: &tfjson.Change{ + Actions: tfjson.Actions{tfjson.ActionDelete, tfjson.ActionCreate}, + }, + Name: "recreate whatever", + }, + } + + res := parseTerraformActions(changes, func(typ string, actions tfjson.Actions) bool { + if typ != "databricks_pipeline" { + return false + } + + if actions.Delete() || actions.Replace() { + return true + } + + return false + }) + + assert.Equal(t, []terraformlib.Action{ + { + Action: terraformlib.ActionTypeDelete, + ResourceType: "databricks_pipeline", + ResourceName: "delete pipeline", + }, + { + Action: terraformlib.ActionTypeRecreate, + ResourceType: "databricks_pipeline", + ResourceName: "recreate pipeline", + }, + }, res) +} diff --git a/internal/bundle/bundles/recreate_pipeline/databricks_template_schema.json b/internal/bundle/bundles/recreate_pipeline/databricks_template_schema.json new file mode 100644 index 0000000000..762f4470c2 --- /dev/null +++ b/internal/bundle/bundles/recreate_pipeline/databricks_template_schema.json @@ -0,0 +1,8 @@ +{ + "properties": { + "unique_id": { + "type": "string", + "description": "Unique ID for the schema and pipeline names" + } + } +} diff --git a/internal/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl b/internal/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl new file mode 100644 index 0000000000..10350f13e9 --- /dev/null +++ b/internal/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl @@ -0,0 +1,25 @@ +bundle: + name: "bundle-playground" + +variables: + catalog: + description: The catalog the DLT pipeline should use. + default: main + + +resources: + pipelines: + foo: + name: test-pipeline-{{.unique_id}} + libraries: + - notebook: + path: ./nb.sql + development: true + catalog: ${var.catalog} + +include: + - "*.yml" + +targets: + development: + default: true diff --git a/internal/bundle/bundles/recreate_pipeline/template/nb.sql b/internal/bundle/bundles/recreate_pipeline/template/nb.sql new file mode 100644 index 0000000000..199ff50788 --- /dev/null +++ b/internal/bundle/bundles/recreate_pipeline/template/nb.sql @@ -0,0 +1,2 @@ +-- Databricks notebook source +select 1 diff --git a/internal/bundle/deploy_test.go b/internal/bundle/deploy_test.go index 269b7c80a0..736c880dbc 100644 --- a/internal/bundle/deploy_test.go +++ b/internal/bundle/deploy_test.go @@ -120,8 +120,97 @@ func TestAccBundleDeployUcSchemaFailsWithoutAutoApprove(t *testing.T) { t.Setenv("BUNDLE_ROOT", bundleRoot) t.Setenv("TERM", "dumb") c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "deploy", "--force-lock") - stdout, _, err := c.Run() + stdout, stderr, err := c.Run() + + assert.EqualError(t, err, root.ErrAlreadyPrinted.Error()) + assert.Contains(t, stderr.String(), "The following UC schemas will be deleted or recreated. Any underlying data may be lost:\n delete schema bar") + assert.Contains(t, stdout.String(), "the deployment requires destructive actions, but current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed") +} + +func TestAccBundlePipelineDeleteWithoutAutoApprove(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + w := wt.W + + nodeTypeId := internal.GetNodeTypeId(env.Get(ctx, "CLOUD_ENV")) + uniqueId := uuid.New().String() + bundleRoot, err := initTestTemplate(t, ctx, "deploy_then_remove_resources", map[string]any{ + "unique_id": uniqueId, + "node_type_id": nodeTypeId, + "spark_version": defaultSparkVersion, + }) + require.NoError(t, err) + + // deploy pipeline + err = deployBundle(t, ctx, bundleRoot) + require.NoError(t, err) + + // assert pipeline is created + pipelineName := "test-bundle-pipeline-" + uniqueId + pipeline, err := w.Pipelines.GetByName(ctx, pipelineName) + require.NoError(t, err) + assert.Equal(t, pipeline.Name, pipelineName) + + // assert job is created + jobName := "test-bundle-job-" + uniqueId + job, err := w.Jobs.GetBySettingsName(ctx, jobName) + require.NoError(t, err) + assert.Equal(t, job.Settings.Name, jobName) + + // delete resources.yml + err = os.Remove(filepath.Join(bundleRoot, "resources.yml")) + require.NoError(t, err) + + // Redeploy the bundle. Expect it to fail because deleting the pipeline requires --auto-approve. + t.Setenv("BUNDLE_ROOT", bundleRoot) + t.Setenv("TERM", "dumb") + c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "deploy", "--force-lock") + stdout, stderr, err := c.Run() + + assert.EqualError(t, err, root.ErrAlreadyPrinted.Error()) + assert.Contains(t, stderr.String(), `This action will result in the deletion or recreation of the following DLT Pipelines along with the +Streaming Tables (STs) and Materialized Views (MVs) managed by them. Recreating the Pipelines will +restore the defined STs and MVs through full refresh. Note that recreation is necessary when pipeline +properties such as the 'catalog' or 'storage' are changed: + delete pipeline bar`) + assert.Contains(t, stdout.String(), "the deployment requires destructive actions, but current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed") + +} + +func TestAccBundlePipelineRecreateWithoutAutoApprove(t *testing.T) { + ctx, wt := acc.UcWorkspaceTest(t) + w := wt.W + uniqueId := uuid.New().String() + + bundleRoot, err := initTestTemplate(t, ctx, "recreate_pipeline", map[string]any{ + "unique_id": uniqueId, + }) + require.NoError(t, err) + + err = deployBundle(t, ctx, bundleRoot) + require.NoError(t, err) + + t.Cleanup(func() { + destroyBundle(t, ctx, bundleRoot) + }) + + // Assert the pipeline is created + pipelineName := "test-pipeline-" + uniqueId + pipeline, err := w.Pipelines.GetByName(ctx, pipelineName) + require.NoError(t, err) + require.Equal(t, pipelineName, pipeline.Name) + + // Redeploy the bundle, pointing the DLT pipeline to a different UC catalog. + t.Setenv("BUNDLE_ROOT", bundleRoot) + t.Setenv("TERM", "dumb") + c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "deploy", "--force-lock", "--var=\"catalog=whatever\"") + stdout, stderr, err := c.Run() + assert.EqualError(t, err, root.ErrAlreadyPrinted.Error()) + assert.Contains(t, stderr.String(), `This action will result in the deletion or recreation of the following DLT Pipelines along with the +Streaming Tables (STs) and Materialized Views (MVs) managed by them. Recreating the Pipelines will +restore the defined STs and MVs through full refresh. Note that recreation is necessary when pipeline +properties such as the 'catalog' or 'storage' are changed: + recreate pipeline foo`) assert.Contains(t, stdout.String(), "the deployment requires destructive actions, but current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed") } From ca6332a5a4325aff1be848536f45d13bd74d93b3 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 4 Sep 2024 13:24:55 +0200 Subject: [PATCH 19/22] Fixed complex variables are not being correctly merged from include files (#1746) ## Changes Fixes an `Error: no value assigned to required variable .` when the main complex variable definition is defined in one file but target override is defined in separate file which is included in the main one. ## Tests Added regression test --- bundle/config/root.go | 12 +++++++++++- bundle/tests/complex_variables_test.go | 19 +++++++++++++++++++ .../complex_multiple_files/databricks.yml | 17 +++++++++++++++++ .../variables/clusters.yml | 11 +++++++++++ 4 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 bundle/tests/variables/complex_multiple_files/databricks.yml create mode 100644 bundle/tests/variables/complex_multiple_files/variables/clusters.yml diff --git a/bundle/config/root.go b/bundle/config/root.go index 86dc33921d..281c5c2a37 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -433,10 +433,20 @@ func rewriteShorthands(v dyn.Value) (dyn.Value, error) { }, variable.Locations()), nil case dyn.KindMap, dyn.KindSequence: + lookup, err := dyn.Get(variable, "lookup") + // If lookup is set, we don't want to rewrite the variable and return it as is. + if err == nil && lookup.Kind() != dyn.KindInvalid { + return variable, nil + } + // Check if the original definition of variable has a type field. + // Type might not be found if the variable overriden in a separate file + // and configuration is not merged yet. typeV, err := dyn.GetByPath(v, p.Append(dyn.Key("type"))) if err != nil { - return variable, nil + return dyn.NewValue(map[string]dyn.Value{ + "default": variable, + }, variable.Locations()), nil } if typeV.MustString() == "complex" { diff --git a/bundle/tests/complex_variables_test.go b/bundle/tests/complex_variables_test.go index 1badea6dfb..d46d8d8c10 100644 --- a/bundle/tests/complex_variables_test.go +++ b/bundle/tests/complex_variables_test.go @@ -68,3 +68,22 @@ func TestComplexVariablesOverride(t *testing.T) { require.Equal(t, "", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.random"]) require.Equal(t, "", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.PolicyId) } + +func TestComplexVariablesOverrideWithMultipleFiles(t *testing.T) { + b, diags := loadTargetWithDiags("variables/complex_multiple_files", "dev") + require.Empty(t, diags) + + diags = bundle.Apply(context.Background(), b, bundle.Seq( + mutator.SetVariables(), + mutator.ResolveVariableReferencesInComplexVariables(), + mutator.ResolveVariableReferences( + "variables", + ), + )) + require.NoError(t, diags.Error()) + + require.Equal(t, "14.2.x-scala2.11", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkVersion) + require.Equal(t, "Standard_DS3_v2", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NodeTypeId) + require.Equal(t, 4, b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NumWorkers) + require.Equal(t, "false", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.speculation"]) +} diff --git a/bundle/tests/variables/complex_multiple_files/databricks.yml b/bundle/tests/variables/complex_multiple_files/databricks.yml new file mode 100644 index 0000000000..cb5d243957 --- /dev/null +++ b/bundle/tests/variables/complex_multiple_files/databricks.yml @@ -0,0 +1,17 @@ +bundle: + name: complex-variables-multiple-files + +resources: + jobs: + my_job: + job_clusters: + - job_cluster_key: key + new_cluster: ${var.cluster} + +variables: + cluster: + type: complex + description: "A cluster definition" + +include: + - ./variables/*.yml diff --git a/bundle/tests/variables/complex_multiple_files/variables/clusters.yml b/bundle/tests/variables/complex_multiple_files/variables/clusters.yml new file mode 100644 index 0000000000..badd451648 --- /dev/null +++ b/bundle/tests/variables/complex_multiple_files/variables/clusters.yml @@ -0,0 +1,11 @@ +targets: + default: + dev: + variables: + cluster: + spark_version: "14.2.x-scala2.11" + node_type_id: "Standard_DS3_v2" + num_workers: 4 + spark_conf: + spark.speculation: false + spark.databricks.delta.retentionDurationCheck.enabled: false From 72030844c52f9499b10e7158544a80fde56451f3 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 4 Sep 2024 19:16:40 +0200 Subject: [PATCH 20/22] Fixed variable override in target with full variable syntax (#1749) ## Changes This PR makes sure that both of this override syntax for variables work correctly ``` targets: dev: variables: cluster1: spark_version: "14.2.x-scala2.11" node_type_id: "Standard_DS3_v2" num_workers: 4 spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false cluster2: default: spark_version: "14.2.x-scala2.11" node_type_id: "Standard_DS3_v2" num_workers: 4 spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false ``` ## Tests Added regression test --------- Co-authored-by: Pieter Noordhuis --- bundle/config/root.go | 43 ++++++++++++++----- bundle/tests/complex_variables_test.go | 11 ++--- .../complex_multiple_files/databricks.yml | 43 +++++++++++++++++-- .../variables/clusters.yml | 10 ++++- 4 files changed, 86 insertions(+), 21 deletions(-) diff --git a/bundle/config/root.go b/bundle/config/root.go index 281c5c2a37..46578769c4 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -406,6 +406,30 @@ func (r *Root) MergeTargetOverrides(name string) error { return r.updateWithDynamicValue(root) } +var variableKeywords = []string{"default", "lookup"} + +// isFullVariableOverrideDef checks if the given value is a full syntax varaible override. +// A full syntax variable override is a map with only one of the following +// keys: "default", "lookup". +func isFullVariableOverrideDef(v dyn.Value) bool { + mv, ok := v.AsMap() + if !ok { + return false + } + + if mv.Len() != 1 { + return false + } + + for _, keyword := range variableKeywords { + if _, ok := mv.GetByString(keyword); ok { + return true + } + } + + return false +} + // rewriteShorthands performs lightweight rewriting of the configuration // tree where we allow users to write a shorthand and must rewrite to the full form. func rewriteShorthands(v dyn.Value) (dyn.Value, error) { @@ -433,30 +457,27 @@ func rewriteShorthands(v dyn.Value) (dyn.Value, error) { }, variable.Locations()), nil case dyn.KindMap, dyn.KindSequence: - lookup, err := dyn.Get(variable, "lookup") - // If lookup is set, we don't want to rewrite the variable and return it as is. - if err == nil && lookup.Kind() != dyn.KindInvalid { + // If it's a full variable definition, leave it as is. + if isFullVariableOverrideDef(variable) { return variable, nil } // Check if the original definition of variable has a type field. + // If it has a type field, it means the shorthand is a value of a complex type. // Type might not be found if the variable overriden in a separate file // and configuration is not merged yet. typeV, err := dyn.GetByPath(v, p.Append(dyn.Key("type"))) - if err != nil { - return dyn.NewValue(map[string]dyn.Value{ - "default": variable, - }, variable.Locations()), nil - } - - if typeV.MustString() == "complex" { + if err == nil && typeV.MustString() == "complex" { return dyn.NewValue(map[string]dyn.Value{ "type": typeV, "default": variable, }, variable.Locations()), nil } - return variable, nil + // If it's a shorthand, rewrite it to a full variable definition. + return dyn.NewValue(map[string]dyn.Value{ + "default": variable, + }, variable.Locations()), nil default: return variable, nil diff --git a/bundle/tests/complex_variables_test.go b/bundle/tests/complex_variables_test.go index d46d8d8c10..6371071ce4 100644 --- a/bundle/tests/complex_variables_test.go +++ b/bundle/tests/complex_variables_test.go @@ -81,9 +81,10 @@ func TestComplexVariablesOverrideWithMultipleFiles(t *testing.T) { ), )) require.NoError(t, diags.Error()) - - require.Equal(t, "14.2.x-scala2.11", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkVersion) - require.Equal(t, "Standard_DS3_v2", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NodeTypeId) - require.Equal(t, 4, b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NumWorkers) - require.Equal(t, "false", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.speculation"]) + for _, cluster := range b.Config.Resources.Jobs["my_job"].JobClusters { + require.Equalf(t, "14.2.x-scala2.11", cluster.NewCluster.SparkVersion, "cluster: %v", cluster.JobClusterKey) + require.Equalf(t, "Standard_DS3_v2", cluster.NewCluster.NodeTypeId, "cluster: %v", cluster.JobClusterKey) + require.Equalf(t, 4, cluster.NewCluster.NumWorkers, "cluster: %v", cluster.JobClusterKey) + require.Equalf(t, "false", cluster.NewCluster.SparkConf["spark.speculation"], "cluster: %v", cluster.JobClusterKey) + } } diff --git a/bundle/tests/variables/complex_multiple_files/databricks.yml b/bundle/tests/variables/complex_multiple_files/databricks.yml index cb5d243957..42a82c6157 100644 --- a/bundle/tests/variables/complex_multiple_files/databricks.yml +++ b/bundle/tests/variables/complex_multiple_files/databricks.yml @@ -5,13 +5,48 @@ resources: jobs: my_job: job_clusters: - - job_cluster_key: key - new_cluster: ${var.cluster} - + - job_cluster_key: key1 + new_cluster: ${var.cluster1} + - job_cluster_key: key2 + new_cluster: ${var.cluster2} + - job_cluster_key: key3 + new_cluster: ${var.cluster3} + - job_cluster_key: key4 + new_cluster: ${var.cluster4} variables: - cluster: + cluster1: + type: complex + description: "A cluster definition" + cluster2: + type: complex + description: "A cluster definition" + cluster3: + type: complex + description: "A cluster definition" + cluster4: type: complex description: "A cluster definition" include: - ./variables/*.yml + + +targets: + default: + dev: + variables: + cluster3: + spark_version: "14.2.x-scala2.11" + node_type_id: "Standard_DS3_v2" + num_workers: 4 + spark_conf: + spark.speculation: false + spark.databricks.delta.retentionDurationCheck.enabled: false + cluster4: + default: + spark_version: "14.2.x-scala2.11" + node_type_id: "Standard_DS3_v2" + num_workers: 4 + spark_conf: + spark.speculation: false + spark.databricks.delta.retentionDurationCheck.enabled: false diff --git a/bundle/tests/variables/complex_multiple_files/variables/clusters.yml b/bundle/tests/variables/complex_multiple_files/variables/clusters.yml index badd451648..0186c437b7 100644 --- a/bundle/tests/variables/complex_multiple_files/variables/clusters.yml +++ b/bundle/tests/variables/complex_multiple_files/variables/clusters.yml @@ -2,10 +2,18 @@ targets: default: dev: variables: - cluster: + cluster1: spark_version: "14.2.x-scala2.11" node_type_id: "Standard_DS3_v2" num_workers: 4 spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false + cluster2: + default: + spark_version: "14.2.x-scala2.11" + node_type_id: "Standard_DS3_v2" + num_workers: 4 + spark_conf: + spark.speculation: false + spark.databricks.delta.retentionDurationCheck.enabled: false From f71d9e76499f9296daad620f6efed651b9864679 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 5 Sep 2024 10:56:52 +0200 Subject: [PATCH 21/22] [Release] Release v0.228.0 (#1752) CLI: * Do not error if we cannot prompt for a profile in `auth login` ([#1745](https://github.com/databricks/cli/pull/1745)). Bundles: As of this release CLI will show a prompt is if there are configuration changes which will lead to a DLT recreation. Users can skip the prompt by specifying the `--auto-approve` flag * Pass along $AZURE_CONFIG_FILE to Terraform process ([#1734](https://github.com/databricks/cli/pull/1734)). * Add prompt when a pipeline recreation happens ([#1672](https://github.com/databricks/cli/pull/1672)). * Use materialized views in the default-sql template ([#1709](https://github.com/databricks/cli/pull/1709)). * Update templates to latest LTS DBR ([#1715](https://github.com/databricks/cli/pull/1715)). * Make lock optional in the JSON schema ([#1738](https://github.com/databricks/cli/pull/1738)). * Do not suppress normalisation diagnostics for resolving variables ([#1740](https://github.com/databricks/cli/pull/1740)). * Include a permissions section in all templates ([#1713](https://github.com/databricks/cli/pull/1713)). * Fixed complex variables are not being correctly merged from include files ([#1746](https://github.com/databricks/cli/pull/1746)). * Fixed variable override in target with full variable syntax ([#1749](https://github.com/databricks/cli/pull/1749)). Internal: * Consider serverless clusters as compatible for Python wheel tasks ([#1733](https://github.com/databricks/cli/pull/1733)). * PythonMutator: explain missing package error ([#1736](https://github.com/databricks/cli/pull/1736)). * Add `dyn.Time` to box a timestamp with its original string value ([#1732](https://github.com/databricks/cli/pull/1732)). * Fix streaming of stdout, stdin, stderr in cobra test runner ([#1742](https://github.com/databricks/cli/pull/1742)). Dependency updates: * Bump github.com/Masterminds/semver/v3 from 3.2.1 to 3.3.0 ([#1741](https://github.com/databricks/cli/pull/1741)). --------- Co-authored-by: Pieter Noordhuis --- CHANGELOG.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fac7d597d1..d638312535 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,34 @@ # Version changelog +## [Release] Release v0.228.0 + +CLI: + * Do not error if we cannot prompt for a profile in `auth login` ([#1745](https://github.com/databricks/cli/pull/1745)). + +Bundles: + +As of this release, the CLI will show a prompt if there are configuration changes that lead to DLT pipeline recreation. +Users can skip the prompt by specifying the `--auto-approve` flag. + + * Pass along to Terraform process ([#1734](https://github.com/databricks/cli/pull/1734)). + * Add prompt when a pipeline recreation happens ([#1672](https://github.com/databricks/cli/pull/1672)). + * Use materialized views in the default-sql template ([#1709](https://github.com/databricks/cli/pull/1709)). + * Update templates to latest LTS DBR ([#1715](https://github.com/databricks/cli/pull/1715)). + * Make lock optional in the JSON schema ([#1738](https://github.com/databricks/cli/pull/1738)). + * Do not suppress normalisation diagnostics for resolving variables ([#1740](https://github.com/databricks/cli/pull/1740)). + * Include a permissions section in all templates ([#1713](https://github.com/databricks/cli/pull/1713)). + * Fixed complex variables are not being correctly merged from include files ([#1746](https://github.com/databricks/cli/pull/1746)). + * Fixed variable override in target with full variable syntax ([#1749](https://github.com/databricks/cli/pull/1749)). + +Internal: + * Consider serverless clusters as compatible for Python wheel tasks ([#1733](https://github.com/databricks/cli/pull/1733)). + * PythonMutator: explain missing package error ([#1736](https://github.com/databricks/cli/pull/1736)). + * Add `dyn.Time` to box a timestamp with its original string value ([#1732](https://github.com/databricks/cli/pull/1732)). + * Fix streaming of stdout, stdin, stderr in cobra test runner ([#1742](https://github.com/databricks/cli/pull/1742)). + +Dependency updates: + * Bump github.com/Masterminds/semver/v3 from 3.2.1 to 3.3.0 ([#1741](https://github.com/databricks/cli/pull/1741)). + ## [Release] Release v0.227.1 CLI: From ceefa80d7278eeef9cce500b2c69453da2040f40 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 5 Sep 2024 13:05:16 +0200 Subject: [PATCH 22/22] Pass copy of `dyn.Path` to callback function (#1747) ## Changes Some call sites hold on to the `dyn.Path` provided to them by the callback. It must therefore never be mutated after the callback returns, or these mutations leak out into unknown scope. This change means it is no longer possible for this failure mode to happen. ## Tests Unit test. --- bundle/artifacts/expand_globs.go | 7 +--- .../config/validate/unique_resource_keys.go | 6 +--- bundle/libraries/expand_glob_references.go | 8 ++--- bundle/libraries/upload.go | 2 +- libs/dyn/visit.go | 2 +- libs/dyn/visit_map.go | 4 +-- libs/dyn/visit_test.go | 36 +++++++++++++++++++ 7 files changed, 45 insertions(+), 20 deletions(-) create mode 100644 libs/dyn/visit_test.go diff --git a/bundle/artifacts/expand_globs.go b/bundle/artifacts/expand_globs.go index 617444054d..cdf3d45900 100644 --- a/bundle/artifacts/expand_globs.go +++ b/bundle/artifacts/expand_globs.go @@ -33,12 +33,7 @@ func createGlobError(v dyn.Value, p dyn.Path, message string) diag.Diagnostic { Severity: diag.Error, Summary: fmt.Sprintf("%s: %s", source, message), Locations: []dyn.Location{v.Location()}, - - Paths: []dyn.Path{ - // Hack to clone the path. This path copy is mutable. - // To be addressed in a later PR. - p.Append(), - }, + Paths: []dyn.Path{p}, } } diff --git a/bundle/config/validate/unique_resource_keys.go b/bundle/config/validate/unique_resource_keys.go index d6212b0acf..50295375b6 100644 --- a/bundle/config/validate/unique_resource_keys.go +++ b/bundle/config/validate/unique_resource_keys.go @@ -3,7 +3,6 @@ package validate import ( "context" "fmt" - "slices" "sort" "github.com/databricks/cli/bundle" @@ -66,10 +65,7 @@ func (m *uniqueResourceKeys) Apply(ctx context.Context, b *bundle.Bundle) diag.D } } - // dyn.Path under the hood is a slice. The code that walks the configuration - // tree uses the same underlying slice to track the path as it walks - // the tree. So, we need to clone it here. - m.paths = append(m.paths, slices.Clone(p)) + m.paths = append(m.paths, p) m.locations = append(m.locations, v.Locations()...) resourceMetadata[k] = m diff --git a/bundle/libraries/expand_glob_references.go b/bundle/libraries/expand_glob_references.go index 9e90a2a17f..9322a06b83 100644 --- a/bundle/libraries/expand_glob_references.go +++ b/bundle/libraries/expand_glob_references.go @@ -16,12 +16,10 @@ type expand struct { func matchError(p dyn.Path, l []dyn.Location, message string) diag.Diagnostic { return diag.Diagnostic{ - Severity: diag.Error, - Summary: message, - Paths: []dyn.Path{ - p.Append(), - }, + Severity: diag.Error, + Summary: message, Locations: l, + Paths: []dyn.Path{p}, } } diff --git a/bundle/libraries/upload.go b/bundle/libraries/upload.go index be7cc41db5..224e7ab2d9 100644 --- a/bundle/libraries/upload.go +++ b/bundle/libraries/upload.go @@ -76,7 +76,7 @@ func collectLocalLibraries(b *bundle.Bundle) (map[string][]configLocation, error source = filepath.Join(b.RootPath, source) libs[source] = append(libs[source], configLocation{ - configPath: p.Append(), // Hack to get the copy of path + configPath: p, location: v.Location(), }) diff --git a/libs/dyn/visit.go b/libs/dyn/visit.go index 4d3cf50142..38adec24ff 100644 --- a/libs/dyn/visit.go +++ b/libs/dyn/visit.go @@ -70,7 +70,7 @@ type visitOptions struct { func visit(v Value, prefix Path, suffix Pattern, opts visitOptions) (Value, error) { if len(suffix) == 0 { - return opts.fn(prefix, v) + return opts.fn(slices.Clone(prefix), v) } // Initialize prefix if it is empty. diff --git a/libs/dyn/visit_map.go b/libs/dyn/visit_map.go index cd2cd4831e..3f0cded03b 100644 --- a/libs/dyn/visit_map.go +++ b/libs/dyn/visit_map.go @@ -21,7 +21,7 @@ func Foreach(fn MapFunc) MapFunc { for _, pair := range m.Pairs() { pk := pair.Key pv := pair.Value - nv, err := fn(append(p, Key(pk.MustString())), pv) + nv, err := fn(p.Append(Key(pk.MustString())), pv) if err != nil { return InvalidValue, err } @@ -32,7 +32,7 @@ func Foreach(fn MapFunc) MapFunc { s := slices.Clone(v.MustSequence()) for i, value := range s { var err error - s[i], err = fn(append(p, Index(i)), value) + s[i], err = fn(p.Append(Index(i)), value) if err != nil { return InvalidValue, err } diff --git a/libs/dyn/visit_test.go b/libs/dyn/visit_test.go new file mode 100644 index 0000000000..5b61399be0 --- /dev/null +++ b/libs/dyn/visit_test.go @@ -0,0 +1,36 @@ +package dyn_test + +import ( + "testing" + + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" +) + +func TestVisitCallbackPathCopy(t *testing.T) { + vin := dyn.V(map[string]dyn.Value{ + "foo": dyn.V(42), + "bar": dyn.V(43), + }) + + var paths []dyn.Path + + // The callback should receive a copy of the path. + // If the same underlying value is used, all collected paths will be the same. + // This test uses `MapByPattern` to collect all paths in the map. + // Visit itself doesn't have public functions and we exclusively use black-box testing for this package. + _, _ = dyn.MapByPattern(vin, dyn.NewPattern(dyn.AnyKey()), func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + paths = append(paths, p) + return v, nil + }) + + // Verify that the paths retained their original values. + var strings []string + for _, p := range paths { + strings = append(strings, p.String()) + } + assert.ElementsMatch(t, strings, []string{ + "foo", + "bar", + }) +}