From 34e509b1fbd06408c9b7bdeca93a39736e50dd08 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Tue, 17 Oct 2023 12:54:33 +0200 Subject: [PATCH] Request management token via Azure CLI only for Service Principals and not human users This PR removes the call to `az account get-access-token` to retrieve management token in case the machine is authenticated with a non-human user. --- databricks/sdk/core.py | 44 ++++++++++++++++++++++++++++++----------- databricks/sdk/oauth.py | 37 ++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 12 deletions(-) diff --git a/databricks/sdk/core.py b/databricks/sdk/core.py index 482d60b31..71a7bc386 100644 --- a/databricks/sdk/core.py +++ b/databricks/sdk/core.py @@ -314,23 +314,42 @@ def __init__(self, resource: str, subscription: str = ""): access_token_field='accessToken', expiry_field='expiresOn') + def is_human_user(self) -> bool: + """The UPN claim is the username of the user, but not the Service Principal. + + Azure CLI can be authenticated by both human users (`az login`) and service principals. In case of service + principals, it can be either OIDC from GitHub or login with a password: + + ~ $ az login --service-principal --user $clientID --password $clientSecret --tenant $tenantID + + Human users get more claims: + - 'amr' - how the subject of the token was authenticated + - 'name', 'family_name', 'given_name' - human-readable values that identifies the subject of the token + - 'scp' with `user_impersonation` value, that shows the set of scopes exposed by your application for which + the client application has requested (and received) consent + - 'unique_name' - a human-readable value that identifies the subject of the token. This value is not + guaranteed to be unique within a tenant and should be used only for display purposes. + - 'upn' - The username of the user. + """ + return 'upn' in self.token().jwt_claims() + @staticmethod def for_resource(cfg: 'Config', resource: str) -> 'AzureCliTokenSource': subscription = AzureCliTokenSource.get_subscription(cfg) if subscription != "": - token = AzureCliTokenSource(resource, subscription) + token_source = AzureCliTokenSource(resource, subscription) try: # This will fail if the user has access to the workspace, but not to the subscription # itself. # In such case, we fall back to not using the subscription. - token.token() - return token + token_source.token() + return token_source except OSError: logger.warning("Failed to get token for subscription. Using resource only token.") - token = AzureCliTokenSource(resource) - token.token() - return token + token_source = AzureCliTokenSource(resource) + token_source.token() + return token_source @staticmethod def get_subscription(cfg: 'Config') -> str: @@ -355,12 +374,13 @@ def azure_cli(cfg: 'Config') -> Optional[HeaderFactory]: doc = 'https://docs.microsoft.com/en-us/cli/azure/?view=azure-cli-latest' logger.debug(f'Most likely Azure CLI is not installed. See {doc} for details') return None - try: - mgmt_token_source = AzureCliTokenSource.for_resource(cfg, - cfg.arm_environment.service_management_endpoint) - except Exception as e: - logger.debug(f'Not including service management token in headers', exc_info=e) - mgmt_token_source = None + if not token_source.is_human_user(): + try: + management_endpoint = cfg.arm_environment.service_management_endpoint + mgmt_token_source = AzureCliTokenSource.for_resource(cfg, management_endpoint) + except Exception as e: + logger.debug(f'Not including service management token in headers', exc_info=e) + mgmt_token_source = None _ensure_host_present(cfg, lambda resource: AzureCliTokenSource.for_resource(cfg, resource)) logger.info("Using Azure CLI authentication with AAD tokens") diff --git a/databricks/sdk/oauth.py b/databricks/sdk/oauth.py index 263067c8f..9a3061e1d 100644 --- a/databricks/sdk/oauth.py +++ b/databricks/sdk/oauth.py @@ -83,6 +83,43 @@ def from_dict(raw: dict) -> 'Token': expiry=datetime.fromisoformat(raw['expiry']), refresh_token=raw.get('refresh_token')) + def jwt_claims(self) -> Dict[str, str]: + """Get claims from the access token or return an empty dictionary if it is not a JWT token. + + All refreshable tokens we're dealing with are JSON Web Tokens (JWT). + + The common claims are: + - 'aud' represents the intended recipient of the token. In case of Azure, this is an app's Application ID + assigned within the Azure portal. + - 'iss' serves to identify the security token service (STS) responsible for creating and delivering the token. + In case of Azure, it includes the Azure AD tenant where user authentication occurred. + - 'appid' stands for the application ID of the client utilizing this token. This application can operate either + autonomously or on behalf of a user. The application ID commonly represents an application object but + may also denote a service principal object in case of Azure. + - 'idp' is used to document the identity provider that authenticated the subject of the token. + - 'oid' is the unchanging identifier for an entity within the identity system. + - 'sub' identifies the primary entity for the token, such as the user of an app. This value is specific to + a particular application ID. If a single user logs into two different apps using distinct client IDs, + these apps will receive different values for the subject claim. + - 'tid' In case of Azure, this value represents Azure Tenant ID. + + See https://datatracker.ietf.org/doc/html/rfc7519 for specification. + See https://jwt.ms for debugger. + """ + try: + jwt_split = self.access_token.split(".") + if len(jwt_split) != 3: + logger.debug(f'Tried to decode access token as JWT, but failed: {len(jwt_split)} components') + return {} + payload_with_padding = jwt_split[1] + "==" + payload_bytes = base64.standard_b64decode(payload_with_padding) + payload_json = payload_bytes.decode("utf8") + claims = json.loads(payload_json) + return claims + except ValueError as err: + logger.debug(f'Tried to decode access token as JWT, but failed: {err}') + return {} + class TokenSource: