From 5e9188ca2cd1f23952a5855276ffcc47dbb5395d Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Tue, 13 Aug 2024 16:57:31 +0200 Subject: [PATCH] fix(ingest/databricks): Updating code to work with Databricks sdk 0.30 (#11158) --- docs-website/sidebars.js | 1 + metadata-ingestion/setup.py | 2 ++ .../ingestion/source/unity/proxy_profiling.py | 13 +++++-------- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 75fc1f2dcd0c5..a3aa54657d067 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -924,6 +924,7 @@ module.exports = { // "docs/_api-guide-template" // - "metadata-service/services/README" // "metadata-ingestion/examples/structured_properties/README" + // "smoke-test/tests/openapi/README" // ], ], }; diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index aef22dd145978..abb716d2434ac 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -104,6 +104,8 @@ classification_lib = { "acryl-datahub-classify==0.0.11", + # schwifty is needed for the classify plugin but in 2024.08.0 they broke the python 3.8 compatibility + "schwifty<2024.08.0", # This is a bit of a hack. Because we download the SpaCy model at runtime in the classify plugin, # we need pip to be available. "pip", diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_profiling.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_profiling.py index 5d6d2bec6d2fc..51546a79e05c3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_profiling.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_profiling.py @@ -1,15 +1,14 @@ import logging import time -from typing import Optional, Union +from typing import Optional from databricks.sdk import WorkspaceClient from databricks.sdk.core import DatabricksError from databricks.sdk.service._internal import Wait from databricks.sdk.service.catalog import TableInfo from databricks.sdk.service.sql import ( - ExecuteStatementResponse, - GetStatementResponse, GetWarehouseResponse, + StatementResponse, StatementState, StatementStatus, ) @@ -125,7 +124,7 @@ def _should_retry_unsupported_column( def _analyze_table( self, ref: TableReference, include_columns: bool - ) -> ExecuteStatementResponse: + ) -> StatementResponse: statement = f"ANALYZE TABLE {ref.schema}.{ref.table} COMPUTE STATISTICS" if include_columns: statement += " FOR ALL COLUMNS" @@ -139,7 +138,7 @@ def _analyze_table( return response def _check_analyze_table_statement_status( - self, execute_response: ExecuteStatementResponse, max_wait_secs: int + self, execute_response: StatementResponse, max_wait_secs: int ) -> bool: if not execute_response.statement_id or not execute_response.status: return False @@ -230,9 +229,7 @@ def _get_int(self, table_info: TableInfo, field: str) -> Optional[int]: return None @staticmethod - def _raise_if_error( - response: Union[ExecuteStatementResponse, GetStatementResponse], key: str - ) -> None: + def _raise_if_error(response: StatementResponse, key: str) -> None: if response.status and response.status.state in [ StatementState.FAILED, StatementState.CANCELED,