Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add allow list for resources when bundle run_as is set #1233

Merged
merged 28 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
9adc6e6
wip
shreyas-goenka Feb 5, 2024
39c1f08
Merge remote-tracking branch 'origin' into fix-run-as
shreyas-goenka Feb 22, 2024
76fc145
Merge remote-tracking branch 'origin' into fix-run-as
shreyas-goenka Feb 23, 2024
b2450c1
Merge remote-tracking branch 'origin' into fix-run-as
shreyas-goenka Feb 28, 2024
4da90f2
wip
shreyas-goenka Mar 4, 2024
05c9d38
further along the way
shreyas-goenka Mar 7, 2024
66f288d
Merge remote-tracking branch 'origin' into fix-run-as
shreyas-goenka Mar 7, 2024
335d0c9
add test for when both sp and user are defined
shreyas-goenka Mar 7, 2024
776af33
fix test
shreyas-goenka Mar 7, 2024
20a0274
final cleanup
shreyas-goenka Mar 7, 2024
5886a65
fix test
shreyas-goenka Mar 7, 2024
f2d24fc
fix windows tests
shreyas-goenka Mar 7, 2024
61f08cc
Merge remote-tracking branch 'origin' into fix-run-as
shreyas-goenka Mar 11, 2024
a4cc92d
rename vars
shreyas-goenka Mar 11, 2024
e7416ef
use typed configuration instead
shreyas-goenka Mar 13, 2024
aafd767
fix tests
shreyas-goenka Mar 13, 2024
9d6949a
cleanup
shreyas-goenka Mar 13, 2024
fab62f3
add error for when neither are specified
shreyas-goenka Mar 13, 2024
89c86c8
lint
shreyas-goenka Mar 13, 2024
dcdb87f
-
shreyas-goenka Mar 13, 2024
9c2bfc6
comment refine
shreyas-goenka Mar 25, 2024
a83453b
Merge remote-tracking branch 'origin' into fix-run-as
shreyas-goenka Mar 25, 2024
e1e5a36
merge diagnostics changes
shreyas-goenka Mar 25, 2024
98f8be2
more diagnostics
shreyas-goenka Mar 25, 2024
d28ce06
Merge remote-tracking branch 'origin' into fix-run-as
shreyas-goenka Mar 27, 2024
9e9e18a
TryLocation -> GetLocation
shreyas-goenka Mar 27, 2024
dcaf247
address comments about comments
shreyas-goenka Mar 27, 2024
a50117e
remove branching for windows assertions
shreyas-goenka Mar 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 91 additions & 26 deletions bundle/config/mutator/run_as.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,24 @@ package mutator

import (
"context"
"slices"
"fmt"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/databricks-sdk-go/service/jobs"
)

type setRunAs struct {
}

// SetRunAs mutator is used to go over defined resources such as Jobs and DLT Pipelines
// And set correct execution identity ("run_as" for a job or "is_owner" permission for DLT)
// if top-level "run-as" section is defined in the configuration.
// This mutator does two things:
//
// 1. Sets the run_as field for jobs to the value of the run_as field in the bundle.
//
// 2. Validates that the bundle run_as configuration is valid in the context of the bundle.
// If the run_as user is different from the current deployment user, DABs only
// supports a subset of resources.
func SetRunAs() bundle.Mutator {
return &setRunAs{}
}
Expand All @@ -24,12 +28,94 @@ func (m *setRunAs) Name() string {
return "SetRunAs"
}

type errUnsupportedResourceTypeForRunAs struct {
resourceType string
resourceLocation dyn.Location
currentUser string
runAsUser string
}

// TODO(6 March 2024): Link the docs page describing run_as semantics in the error below
// once the page is ready.
shreyas-goenka marked this conversation as resolved.
Show resolved Hide resolved
func (e errUnsupportedResourceTypeForRunAs) Error() string {
return fmt.Sprintf("%s are not supported when the current deployment user is different from the bundle's run_as identity. Please deploy as the run_as identity. Location of the unsupported resource: %s. Current identity: %s. Run as identity: %s", e.resourceType, e.resourceLocation, e.currentUser, e.runAsUser)
}
shreyas-goenka marked this conversation as resolved.
Show resolved Hide resolved

type errBothSpAndUserSpecified struct {
spName string
spLoc dyn.Location
userName string
userLoc dyn.Location
}

func (e errBothSpAndUserSpecified) Error() string {
return fmt.Sprintf("run_as section must specify exactly one identity. A service_principal_name %q is specified at %s. A user_name %q is defined at %s", e.spName, e.spLoc, e.userName, e.userLoc)
}

func validateRunAs(b *bundle.Bundle) error {
runAs := b.Config.RunAs

// Error if neither service_principal_name nor user_name are specified
if runAs.ServicePrincipalName == "" && runAs.UserName == "" {
return fmt.Errorf("run_as section must specify exactly one identity. Neither service_principal_name nor user_name is specified at %s", b.Config.GetLocation("run_as"))
}

// Error if both service_principal_name and user_name are specified
if runAs.UserName != "" && runAs.ServicePrincipalName != "" {
return errBothSpAndUserSpecified{
spName: runAs.ServicePrincipalName,
userName: runAs.UserName,
spLoc: b.Config.GetLocation("run_as.service_principal_name"),
userLoc: b.Config.GetLocation("run_as.user_name"),
}
}

identity := runAs.ServicePrincipalName
if identity == "" {
identity = runAs.UserName
}

// All resources are supported if the run_as identity is the same as the current deployment identity.
if identity == b.Config.Workspace.CurrentUser.UserName {
return nil
}

// DLT pipelines do not support run_as in the API.
if len(b.Config.Resources.Pipelines) > 0 {
return errUnsupportedResourceTypeForRunAs{
resourceType: "pipelines",
resourceLocation: b.Config.GetLocation("resources.pipelines"),
currentUser: b.Config.Workspace.CurrentUser.UserName,
runAsUser: identity,
}
}

// Model serving endpoints do not support run_as in the API.
if len(b.Config.Resources.ModelServingEndpoints) > 0 {
return errUnsupportedResourceTypeForRunAs{
resourceType: "model_serving_endpoints",
resourceLocation: b.Config.GetLocation("resources.model_serving_endpoints"),
currentUser: b.Config.Workspace.CurrentUser.UserName,
runAsUser: identity,
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use an allow list instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not unless we use reflect / dynamic configuration. The allow list semantics are being captured in the unit test though.


return nil
}

func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
// Mutator is a no-op if run_as is not specified in the bundle
runAs := b.Config.RunAs
if runAs == nil {
return nil
}

// Assert the run_as configuration is valid in the context of the bundle
if err := validateRunAs(b); err != nil {
return diag.FromErr(err)
}

// Set run_as for jobs
for i := range b.Config.Resources.Jobs {
job := b.Config.Resources.Jobs[i]
if job.RunAs != nil {
Expand All @@ -41,26 +127,5 @@ func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
}
}

me := b.Config.Workspace.CurrentUser.UserName
// If user deploying the bundle and the one defined in run_as are the same
// Do not add IS_OWNER permission. Current user is implied to be an owner in this case.
// Otherwise, it will fail due to this bug https://github.com/databricks/terraform-provider-databricks/issues/2407
if runAs.UserName == me || runAs.ServicePrincipalName == me {
return nil
}

for i := range b.Config.Resources.Pipelines {
pipeline := b.Config.Resources.Pipelines[i]
pipeline.Permissions = slices.DeleteFunc(pipeline.Permissions, func(p resources.Permission) bool {
return (runAs.ServicePrincipalName != "" && p.ServicePrincipalName == runAs.ServicePrincipalName) ||
(runAs.UserName != "" && p.UserName == runAs.UserName)
})
pipeline.Permissions = append(pipeline.Permissions, resources.Permission{
Level: "IS_OWNER",
ServicePrincipalName: runAs.ServicePrincipalName,
UserName: runAs.UserName,
})
}

return nil
}
188 changes: 188 additions & 0 deletions bundle/config/mutator/run_as_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
package mutator

import (
"context"
"slices"
"testing"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/dyn/convert"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func allResourceTypes(t *testing.T) []string {
// Compute supported resource types based on the `Resources{}` struct.
r := config.Resources{}
rv, err := convert.FromTyped(r, dyn.NilValue)
require.NoError(t, err)
normalized, _ := convert.Normalize(r, rv, convert.IncludeMissingFields)
resourceTypes := []string{}
for _, k := range normalized.MustMap().Keys() {
resourceTypes = append(resourceTypes, k.MustString())
}
slices.Sort(resourceTypes)

// Assert the total list of resource supported, as a sanity check that using
// the dyn library gives us the correct list of all resources supported. Please
// also update this check when adding a new resource
require.Equal(t, []string{
"experiments",
"jobs",
"model_serving_endpoints",
"models",
"pipelines",
"registered_models",
},
resourceTypes,
)

return resourceTypes
}

func TestRunAsWorksForAllowedResources(t *testing.T) {
config := config.Root{
Workspace: config.Workspace{
CurrentUser: &config.User{
User: &iam.User{
UserName: "alice",
},
},
},
RunAs: &jobs.JobRunAs{
UserName: "bob",
},
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job_one": {
JobSettings: &jobs.JobSettings{
Name: "foo",
},
},
"job_two": {
JobSettings: &jobs.JobSettings{
Name: "bar",
},
},
"job_three": {
JobSettings: &jobs.JobSettings{
Name: "baz",
},
},
},
Models: map[string]*resources.MlflowModel{
"model_one": {},
},
RegisteredModels: map[string]*resources.RegisteredModel{
"registered_model_one": {},
},
Experiments: map[string]*resources.MlflowExperiment{
"experiment_one": {},
},
},
}

b := &bundle.Bundle{
Config: config,
}

diags := bundle.Apply(context.Background(), b, SetRunAs())
assert.NoError(t, diags.Error())

for _, job := range b.Config.Resources.Jobs {
assert.Equal(t, "bob", job.RunAs.UserName)
}
}

func TestRunAsErrorForUnsupportedResources(t *testing.T) {
// Bundle "run_as" has two modes of operation, each with a different set of
// resources that are supported.
// Cases:
// 1. When the bundle "run_as" identity is same as the current deployment
// identity. In this case all resources are supported.
// 2. When the bundle "run_as" identity is different from the current
// deployment identity. In this case only a subset of resources are
// supported. This subset of resources are defined in the allow list below.
//
// To be a part of the allow list, the resource must satisfy one of the following
// two conditions:
// 1. The resource supports setting a run_as identity to a different user
// from the owner/creator of the resource. For example, jobs.
// 2. Run as semantics do not apply to the resource. We do not plan to add
// platform side support for `run_as` for these resources. For example,
// experiments or registered models.
//
// Any resource that is not on the allow list cannot be used when the bundle
// run_as is different from the current deployment user. "bundle validate" must
// return an error if such a resource has been defined, and the run_as identity
// is different from the current deployment identity.
//
// Action Item: If you are adding a new resource to DABs, please check in with
// the relevant owning team whether the resource should be on the allow list or (implicitly) on
// the deny list. Any resources that could have run_as semantics in the future
// should be on the deny list.
// For example: Teams for pipelines, model serving endpoints or Lakeview dashboards
// are planning to add platform side support for `run_as` for these resources at
// some point in the future. These resources are (implicitly) on the deny list, since
// they are not on the allow list below.
allowList := []string{
"jobs",
"models",
"registered_models",
"experiments",
}

base := config.Root{
Workspace: config.Workspace{
CurrentUser: &config.User{
User: &iam.User{
UserName: "alice",
},
},
},
RunAs: &jobs.JobRunAs{
UserName: "bob",
},
}

v, err := convert.FromTyped(base, dyn.NilValue)
require.NoError(t, err)

for _, rt := range allResourceTypes(t) {
// Skip allowed resources
if slices.Contains(allowList, rt) {
continue
}

// Add an instance of the resource type that is not on the allow list to
// the bundle configuration.
nv, err := dyn.SetByPath(v, dyn.NewPath(dyn.Key("resources"), dyn.Key(rt)), dyn.V(map[string]dyn.Value{
"foo": dyn.V(map[string]dyn.Value{
"path": dyn.V("bar"),
}),
}))
require.NoError(t, err)

// Get back typed configuration from the newly created invalid bundle configuration.
r := &config.Root{}
err = convert.ToTyped(r, nv)
require.NoError(t, err)

// Assert this invalid bundle configuration fails validation.
b := &bundle.Bundle{
Config: *r,
}
diags := bundle.Apply(context.Background(), b, SetRunAs())
assert.Equal(t, diags.Error().Error(), errUnsupportedResourceTypeForRunAs{
resourceType: rt,
resourceLocation: dyn.Location{},
currentUser: "alice",
runAsUser: "bob",
}.Error(), "expected run_as with a different identity than the current deployment user to not supported for resources of type: %s", rt)
}
}
11 changes: 11 additions & 0 deletions bundle/config/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -448,3 +448,14 @@ func validateVariableOverrides(root, target dyn.Value) (err error) {

return nil
}

// Best effort to get the location of configuration value at the specified path.
// This function is useful to annotate error messages with the location, because
// we don't want to fail with a different error message if we cannot retrieve the location.
func (r *Root) GetLocation(path string) dyn.Location {
v, err := dyn.Get(r.value, path)
if err != nil {
return dyn.Location{}
}
return v.Location()
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,6 @@ targets:
user_name: "my_user_name"

resources:
pipelines:
nyc_taxi_pipeline:
name: "nyc taxi loader"

permissions:
- level: CAN_VIEW
service_principal_name: my_service_principal
- level: CAN_VIEW
user_name: my_user_name

libraries:
- notebook:
path: ./dlt/nyc_taxi_loader

jobs:
job_one:
name: Job One
Expand Down Expand Up @@ -52,3 +38,15 @@ resources:
- task_key: "task_three"
notebook_task:
notebook_path: "./test.py"

models:
model_one:
name: "skynet"

registered_models:
model_two:
name: "skynet (in UC)"

experiments:
experiment_one:
name: "experiment_one"
Loading
Loading