From 3a4bdef44a6ab4675de7e220e2dad7f079ac3ec2 Mon Sep 17 00:00:00 2001 From: dushayntAW <158567391+dushayntAW@users.noreply.github.com> Date: Mon, 18 Mar 2024 20:22:17 +0530 Subject: [PATCH 1/2] fix(ingest/unity): generate sibling and lineage (#9894) --- .../databricks/unity-catalog_recipe.yml | 5 + .../datahub/ingestion/source/unity/config.py | 17 + .../datahub/ingestion/source/unity/source.py | 63 +- .../unity/test_unity_catalog_ingest.py | 107 +- .../unity/unity_catalog_mces_golden.json | 2461 +++-------------- 5 files changed, 497 insertions(+), 2156 deletions(-) diff --git a/metadata-ingestion/docs/sources/databricks/unity-catalog_recipe.yml b/metadata-ingestion/docs/sources/databricks/unity-catalog_recipe.yml index 931552e7343d0..7e0d7fcbb912b 100644 --- a/metadata-ingestion/docs/sources/databricks/unity-catalog_recipe.yml +++ b/metadata-ingestion/docs/sources/databricks/unity-catalog_recipe.yml @@ -15,6 +15,11 @@ source: deny: - ".*\\.unwanted_schema" +# emit_siblings: true +# delta_lake_options: +# platform_instance_name: null +# env: 'PROD' + # profiling: # method: "analyze" # enabled: true diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py index d933e5a5ff38e..1fdce3aa1e2d3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py @@ -60,6 +60,13 @@ class UnityCatalogProfilerConfig(ConfigModel): ) +class DeltaLakeDetails(ConfigModel): + platform_instance_name: Optional[str] = Field( + default=None, description="Delta-lake paltform instance name" + ) + env: str = Field(default="PROD", description="Delta-lake environment") + + class UnityCatalogAnalyzeProfilerConfig(UnityCatalogProfilerConfig): method: Literal["analyze"] = "analyze" @@ -253,6 +260,16 @@ class UnityCatalogSourceConfig( discriminator="method", ) + emit_siblings: bool = pydantic.Field( + default=True, + description="Whether to emit siblings relation with corresponding delta-lake platform's table. If enabled, this will also ingest the corresponding delta-lake table.", + ) + + delta_lake_options: DeltaLakeDetails = Field( + default=DeltaLakeDetails(), + description="Details about the delta lake, incase to emit siblings", + ) + scheme: str = DATABRICKS def get_sql_alchemy_url(self, database: Optional[str] = None) -> str: diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py index ad5a75c4d73d4..143d8dd0e2949 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py @@ -41,7 +41,10 @@ TestConnectionReport, ) from datahub.ingestion.api.workunit import MetadataWorkUnit -from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage +from datahub.ingestion.source.aws.s3_util import ( + make_s3_urn_for_lineage, + strip_s3_prefix, +) from datahub.ingestion.source.common.subtypes import ( DatasetContainerSubTypes, DatasetSubTypes, @@ -80,9 +83,13 @@ ) from datahub.ingestion.source.unity.report import UnityCatalogReport from datahub.ingestion.source.unity.usage import UnityCatalogUsageExtractor +from datahub.metadata.com.linkedin.pegasus2avro.common import Siblings from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( + DatasetLineageType, FineGrainedLineage, FineGrainedLineageUpstreamType, + Upstream, + UpstreamLineage, ViewProperties, ) from datahub.metadata.schema_classes import ( @@ -491,6 +498,25 @@ def process_table(self, table: Table, schema: Schema) -> Iterable[MetadataWorkUn if table.view_definition: self.view_definitions[dataset_urn] = (table.ref, table.view_definition) + # generate sibling and lineage aspects in case of EXTERNAL DELTA TABLE + if ( + table_props.customProperties.get("table_type") == "EXTERNAL" + and table_props.customProperties.get("data_source_format") == "DELTA" + and self.config.emit_siblings + ): + storage_location = str(table_props.customProperties.get("storage_location")) + if storage_location.startswith("s3://"): + browse_path = strip_s3_prefix(storage_location) + source_dataset_urn = make_dataset_urn_with_platform_instance( + "delta-lake", + browse_path, + self.config.delta_lake_options.platform_instance_name, + self.config.delta_lake_options.env, + ) + + yield from self.gen_siblings_workunit(dataset_urn, source_dataset_urn) + yield from self.gen_lineage_workunit(dataset_urn, source_dataset_urn) + yield from [ mcp.as_workunit() for mcp in MetadataChangeProposalWrapper.construct_many( @@ -947,3 +973,38 @@ def close(self): self.sql_parser_schema_resolver.close() super().close() + + def gen_siblings_workunit( + self, + dataset_urn: str, + source_dataset_urn: str, + ) -> Iterable[MetadataWorkUnit]: + """ + Generate sibling workunit for both unity-catalog dataset and its connector source dataset + """ + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=Siblings(primary=False, siblings=[source_dataset_urn]), + ).as_workunit() + + yield MetadataChangeProposalWrapper( + entityUrn=source_dataset_urn, + aspect=Siblings(primary=True, siblings=[dataset_urn]), + ).as_workunit(is_primary_source=False) + + def gen_lineage_workunit( + self, + dataset_urn: str, + source_dataset_urn: str, + ) -> Iterable[MetadataWorkUnit]: + """ + Generate dataset to source connector lineage workunit + """ + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=UpstreamLineage( + upstreams=[ + Upstream(dataset=source_dataset_urn, type=DatasetLineageType.VIEW) + ] + ), + ).as_workunit() diff --git a/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py b/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py index 05f1db0b932f8..56c7334ea90b1 100644 --- a/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py +++ b/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py @@ -66,17 +66,6 @@ def register_mock_data(workspace_client): workspace_client.catalogs.list.return_value = [ CatalogInfo.from_dict(d) for d in [ - { - "name": "main", - "owner": "account users", - "comment": "Main catalog (auto-created)", - "metastore_id": "2c983545-d403-4f87-9063-5b7e3b6d3736", - "created_at": 1666185153376, - "created_by": "abc@acryl.io", - "updated_at": 1666186071115, - "updated_by": "abc@acryl.io", - "catalog_type": "MANAGED_CATALOG", - }, { "name": "quickstart_catalog", "owner": "account users", @@ -87,50 +76,13 @@ def register_mock_data(workspace_client): "updated_at": 1666186064332, "updated_by": "abc@acryl.io", "catalog_type": "MANAGED_CATALOG", - }, - { - "name": "system", - "owner": SERVICE_PRINCIPAL_ID_2, - "comment": "System catalog (auto-created)", - "metastore_id": "2c983545-d403-4f87-9063-5b7e3b6d3736", - "created_at": 1666185153391, - "created_by": "System user", - "updated_at": 1666185153391, - "updated_by": "System user", - "catalog_type": "SYSTEM_CATALOG", - }, + } ] ] workspace_client.schemas.list.return_value = [ SchemaInfo.from_dict(d) for d in [ - { - "name": "default", - "catalog_name": "quickstart_catalog", - "owner": "abc@acryl.io", - "comment": "Default schema (auto-created)", - "metastore_id": "2c983545-d403-4f87-9063-5b7e3b6d3736", - "full_name": "quickstart_catalog.default", - "created_at": 1666185610021, - "created_by": "abc@acryl.io", - "updated_at": 1666185610021, - "updated_by": "abc@acryl.io", - "catalog_type": "MANAGED_CATALOG", - }, - { - "name": "information_schema", - "catalog_name": "quickstart_catalog", - "owner": SERVICE_PRINCIPAL_ID_1, - "comment": "Information schema (auto-created)", - "metastore_id": "2c983545-d403-4f87-9063-5b7e3b6d3736", - "full_name": "quickstart_catalog.information_schema", - "created_at": 1666185610024, - "created_by": "System user", - "updated_at": 1666185610024, - "updated_by": "System user", - "catalog_type": "MANAGED_CATALOG", - }, { "name": "quickstart_schema", "catalog_name": "quickstart_catalog", @@ -199,7 +151,57 @@ def register_mock_data(workspace_client): "updated_by": "abc@acryl.io", "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", } - ) + ), + databricks.sdk.service.catalog.TableInfo.from_dict( + { + "name": "quickstart_table_external", + "catalog_name": "quickstart_catalog", + "schema_name": "quickstart_schema", + "table_type": "EXTERNAL", + "data_source_format": "DELTA", + "columns": [ + { + "name": "columnA", + "type_text": "int", + "type_json": '{"name":"columnA","type":"integer","nullable":true,"metadata":{}}', + "type_name": "INT", + "type_precision": 0, + "type_scale": 0, + "position": 0, + "nullable": True, + }, + { + "name": "columnB", + "type_text": "string", + "type_json": '{"name":"columnB","type":"string","nullable":true,"metadata":{}}', + "type_name": "STRING", + "type_precision": 0, + "type_scale": 0, + "position": 1, + "nullable": True, + }, + ], + "storage_location": "s3://db-02eec1f70bfe4115445be9fdb1aac6ac-s3-root-bucket/metastore/2c983545-d403-4f87-9063-5b7e3b6d3736/tables/cff27aa1-1c6a-4d78-b713-562c660c2896", + "owner": "account users", + "properties": { + "delta.lastCommitTimestamp": "1666185711000", + "delta.lastUpdateVersion": "1", + "delta.minReaderVersion": "1", + "delta.minWriterVersion": "2", + "spark.sql.statistics.numRows": "10", + "spark.sql.statistics.totalSize": "512", + }, + "generation": 2, + "metastore_id": "2c983545-d403-4f87-9063-5b7e3b6d3736", + "full_name": "quickstart_catalog.quickstart_schema.quickstart_table_external", + "data_access_configuration_id": "00000000-0000-0000-0000-000000000000", + "created_at": 1666185698688, + "created_by": "abc@acryl.io", + "updated_at": 1666186049633, + "updated_by": "abc@acryl.io", + "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", + } + ), ] workspace_client.tables.get = lambda *args, **kwargs: databricks.sdk.service.catalog.TableInfo.from_dict( @@ -409,6 +411,11 @@ def test_ingestion(pytestconfig, tmp_path, requests_mock): "include_ownership": True, "include_hive_metastore": True, "warehouse_id": "test", + "emit_siblings": True, + "delta_lake_options": { + "platform_instance_name": None, + "env": "PROD", + }, "profiling": { "enabled": True, "method": "analyze", diff --git a/metadata-ingestion/tests/integration/unity/unity_catalog_mces_golden.json b/metadata-ingestion/tests/integration/unity/unity_catalog_mces_golden.json index 7cc0f84ee5177..88aa0938942b3 100644 --- a/metadata-ingestion/tests/integration/unity/unity_catalog_mces_golden.json +++ b/metadata-ingestion/tests/integration/unity/unity_catalog_mces_golden.json @@ -213,30 +213,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1705308660413, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, - "rowCount": 10, - "columnCount": 2, - "fieldProfiles": [], - "sizeInBytes": 512 - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.view1,PROD)", @@ -750,51 +726,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "storage_location": "s3://db-02eec1f70bfe4115445be9fdb1aac6ac-s3-root-bucket/metastore/2c983545-d403-4f87-9063-5b7e3b6d3736/tables/cff27aa1-1c6a-4d78-b713-562c660c2896", - "data_source_format": "DELTA", - "generation": "2", - "table_type": "MANAGED", - "created_by": "abc@acryl.io", - "delta.lastCommitTimestamp": "1666185711000", - "delta.lastUpdateVersion": "1", - "delta.minReaderVersion": "1", - "delta.minWriterVersion": "2", - "spark.sql.statistics.numRows": "10", - "spark.sql.statistics.totalSize": "512", - "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", - "owner": "account users", - "updated_by": "abc@acryl.io", - "updated_at": "2022-10-19 13:27:29.633000+00:00", - "created_at": "2022-10-19 13:21:38.688000+00:00" - }, - "externalUrl": "https://dummy.cloud.databricks.com/explore/data/main/quickstart_schema/quickstart_table", - "name": "quickstart_table", - "qualifiedName": "main.quickstart_schema.quickstart_table", - "created": { - "time": 1666185698688, - "actor": "urn:li:corpuser:abc@acryl.io" - }, - "lastModified": { - "time": 1666186049633, - "actor": "urn:li:corpuser:abc@acryl.io" - }, - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.view1,PROD)", @@ -815,12 +746,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.quickstart_schema.quickstart_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:c45a3b960d7503abfb5549f583eb0517" + "container": "urn:li:container:21058fb6993a790a4a43727021e52956" } }, "systemMetadata": { @@ -831,7 +762,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.quickstart_schema.quickstart_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -847,142 +778,14 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:account users", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:045573d60442121f01b8d66a3eb95622", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "databricks", - "env": "PROD", - "catalog": "main" - }, - "externalUrl": "https://dummy.cloud.databricks.com/explore/data/main", - "name": "main", - "description": "Main catalog (auto-created)" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:045573d60442121f01b8d66a3eb95622", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:045573d60442121f01b8d66a3eb95622", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:databricks" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD)", "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:21058fb6993a790a4a43727021e52956" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:045573d60442121f01b8d66a3eb95622", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "main.quickstart_schema.quickstart_table", + "schemaName": "hive_metastore.bronze_kambi.bet", "platform": "urn:li:dataPlatform:databricks", "version": 0, "created": { @@ -1001,148 +804,29 @@ }, "fields": [ { - "fieldPath": "columnA", - "nullable": true, + "fieldPath": "betStatusId", + "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "int", + "nativeDataType": "bigint", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "columnB", - "nullable": true, + "fieldPath": "channelId", + "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "bigint", "recursive": false, "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:045573d60442121f01b8d66a3eb95622", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Catalog" - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:045573d60442121f01b8d66a3eb95622", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:account users", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "hive_metastore.bronze_kambi.bet", - "platform": "urn:li:dataPlatform:databricks", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "betStatusId", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "bigint", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "channelId", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "bigint", - "recursive": false, - "isPartOfKey": false - }, + }, { "fieldPath": "[version=2.0].[type=struct].[type=struct].combination", "nullable": false, @@ -1595,22 +1279,20 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.quickstart_schema.quickstart_table,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:730e95cd0271453376b3c1d9623838d6", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "containerProperties", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:045573d60442121f01b8d66a3eb95622", - "urn": "urn:li:container:045573d60442121f01b8d66a3eb95622" - }, - { - "id": "urn:li:container:c45a3b960d7503abfb5549f583eb0517", - "urn": "urn:li:container:c45a3b960d7503abfb5549f583eb0517" - } - ] + "customProperties": { + "platform": "databricks", + "env": "PROD", + "catalog": "quickstart_catalog" + }, + "externalUrl": "https://dummy.cloud.databricks.com/explore/data/quickstart_catalog", + "name": "quickstart_catalog", + "description": "" } }, "systemMetadata": { @@ -1620,21 +1302,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.default.quickstart_table,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:730e95cd0271453376b3c1d9623838d6", "changeType": "UPSERT", - "aspectName": "datasetProfile", + "aspectName": "status", "aspect": { "json": { - "timestampMillis": 1705308660403, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, - "rowCount": 10, - "columnCount": 2, - "fieldProfiles": [], - "sizeInBytes": 512 + "removed": false } }, "systemMetadata": { @@ -1644,42 +1318,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.default.quickstart_table,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:730e95cd0271453376b3c1d9623838d6", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "customProperties": { - "storage_location": "s3://db-02eec1f70bfe4115445be9fdb1aac6ac-s3-root-bucket/metastore/2c983545-d403-4f87-9063-5b7e3b6d3736/tables/cff27aa1-1c6a-4d78-b713-562c660c2896", - "data_source_format": "DELTA", - "generation": "2", - "table_type": "MANAGED", - "created_by": "abc@acryl.io", - "delta.lastCommitTimestamp": "1666185711000", - "delta.lastUpdateVersion": "1", - "delta.minReaderVersion": "1", - "delta.minWriterVersion": "2", - "spark.sql.statistics.numRows": "10", - "spark.sql.statistics.totalSize": "512", - "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", - "owner": "account users", - "updated_by": "abc@acryl.io", - "updated_at": "2022-10-19 13:27:29.633000+00:00", - "created_at": "2022-10-19 13:21:38.688000+00:00" - }, - "externalUrl": "https://dummy.cloud.databricks.com/explore/data/quickstart_catalog/default/quickstart_table", - "name": "quickstart_table", - "qualifiedName": "quickstart_catalog.default.quickstart_table", - "created": { - "time": 1666185698688, - "actor": "urn:li:corpuser:abc@acryl.io" - }, - "lastModified": { - "time": 1666186049633, - "actor": "urn:li:corpuser:abc@acryl.io" - }, - "tags": [] + "platform": "urn:li:dataPlatform:databricks" } }, "systemMetadata": { @@ -1689,13 +1334,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.default.quickstart_table,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:730e95cd0271453376b3c1d9623838d6", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:39e66cf95f60943f217d96142ffd9ddc" + "path": [] } }, "systemMetadata": { @@ -1705,14 +1350,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.default.quickstart_table,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:730e95cd0271453376b3c1d9623838d6", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Table" + "Catalog" ] } }, @@ -1724,31 +1369,7 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:d6f502d4d8165f68d5b594ab4cb2171e", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "databricks", - "env": "PROD", - "catalog": "main", - "unity_schema": "default" - }, - "externalUrl": "https://dummy.cloud.databricks.com/explore/data/main/default", - "name": "default", - "description": "Default schema (auto-created)" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.default.quickstart_table,PROD)", + "entityUrn": "urn:li:container:730e95cd0271453376b3c1d9623838d6", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -1759,6 +1380,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -1773,12 +1395,20 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:d6f502d4d8165f68d5b594ab4cb2171e", + "entityUrn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "containerProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "platform": "databricks", + "env": "PROD", + "catalog": "quickstart_catalog", + "unity_schema": "quickstart_schema" + }, + "externalUrl": "https://dummy.cloud.databricks.com/explore/data/quickstart_catalog/quickstart_schema", + "name": "quickstart_schema", + "description": "A new Unity Catalog schema called quickstart_schema" } }, "systemMetadata": { @@ -1789,12 +1419,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:d6f502d4d8165f68d5b594ab4cb2171e", + "entityUrn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:databricks" + "removed": false } }, "systemMetadata": { @@ -1804,13 +1434,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.default.quickstart_table,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:databricks" } }, "systemMetadata": { @@ -1820,53 +1450,16 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.default.quickstart_table,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", "changeType": "UPSERT", - "aspectName": "schemaMetadata", + "aspectName": "browsePathsV2", "aspect": { "json": { - "schemaName": "quickstart_catalog.default.quickstart_table", - "platform": "urn:li:dataPlatform:databricks", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "columnA", - "nullable": true, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - }, + "path": [ { - "fieldPath": "columnB", - "nullable": true, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false + "id": "urn:li:container:730e95cd0271453376b3c1d9623838d6", + "urn": "urn:li:container:730e95cd0271453376b3c1d9623838d6" } ] } @@ -1879,16 +1472,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:d6f502d4d8165f68d5b594ab4cb2171e", + "entityUrn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "subTypes", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:045573d60442121f01b8d66a3eb95622", - "urn": "urn:li:container:045573d60442121f01b8d66a3eb95622" - } + "typeNames": [ + "Schema" ] } }, @@ -1900,12 +1490,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.default.quickstart_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table_external,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:d6f502d4d8165f68d5b594ab4cb2171e" + "container": "urn:li:container:934b6043df189ef6dc63ac3519be34ac" } }, "systemMetadata": { @@ -1915,15 +1505,16 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:d6f502d4d8165f68d5b594ab4cb2171e", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table_external,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "siblings", "aspect": { "json": { - "typeNames": [ - "Schema" - ] + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:delta-lake,db-02eec1f70bfe4115445be9fdb1aac6ac-s3-root-bucket/metastore/2c983545-d403-4f87-9063-5b7e3b6d3736/tables/cff27aa1-1c6a-4d78-b713-562c660c2896,PROD)" + ], + "primary": false } }, "systemMetadata": { @@ -1934,14 +1525,15 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.default.quickstart_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,db-02eec1f70bfe4115445be9fdb1aac6ac-s3-root-bucket/metastore/2c983545-d403-4f87-9063-5b7e3b6d3736/tables/cff27aa1-1c6a-4d78-b713-562c660c2896,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "siblings", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "siblings": [ + "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table_external,PROD)" + ], + "primary": true } }, "systemMetadata": { @@ -1952,1467 +1544,20 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.default.quickstart_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table_external,PROD)", "changeType": "UPSERT", - "aspectName": "schemaMetadata", + "aspectName": "browsePathsV2", "aspect": { "json": { - "schemaName": "main.default.quickstart_table", - "platform": "urn:li:dataPlatform:databricks", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "columnA", - "nullable": true, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "columnB", - "nullable": true, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.default.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "storage_location": "s3://db-02eec1f70bfe4115445be9fdb1aac6ac-s3-root-bucket/metastore/2c983545-d403-4f87-9063-5b7e3b6d3736/tables/cff27aa1-1c6a-4d78-b713-562c660c2896", - "data_source_format": "DELTA", - "generation": "2", - "table_type": "MANAGED", - "created_by": "abc@acryl.io", - "delta.lastCommitTimestamp": "1666185711000", - "delta.lastUpdateVersion": "1", - "delta.minReaderVersion": "1", - "delta.minWriterVersion": "2", - "spark.sql.statistics.numRows": "10", - "spark.sql.statistics.totalSize": "512", - "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", - "owner": "account users", - "updated_by": "abc@acryl.io", - "updated_at": "2022-10-19 13:27:29.633000+00:00", - "created_at": "2022-10-19 13:21:38.688000+00:00" - }, - "externalUrl": "https://dummy.cloud.databricks.com/explore/data/main/default/quickstart_table", - "name": "quickstart_table", - "qualifiedName": "main.default.quickstart_table", - "created": { - "time": 1666185698688, - "actor": "urn:li:corpuser:abc@acryl.io" - }, - "lastModified": { - "time": 1666186049633, - "actor": "urn:li:corpuser:abc@acryl.io" - }, - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.default.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:045573d60442121f01b8d66a3eb95622", - "urn": "urn:li:container:045573d60442121f01b8d66a3eb95622" - }, - { - "id": "urn:li:container:d6f502d4d8165f68d5b594ab4cb2171e", - "urn": "urn:li:container:d6f502d4d8165f68d5b594ab4cb2171e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.default.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:account users", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:730e95cd0271453376b3c1d9623838d6", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "databricks", - "env": "PROD", - "catalog": "quickstart_catalog" - }, - "externalUrl": "https://dummy.cloud.databricks.com/explore/data/quickstart_catalog", - "name": "quickstart_catalog", - "description": "" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.default.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:730e95cd0271453376b3c1d9623838d6", - "urn": "urn:li:container:730e95cd0271453376b3c1d9623838d6" - }, - { - "id": "urn:li:container:39e66cf95f60943f217d96142ffd9ddc", - "urn": "urn:li:container:39e66cf95f60943f217d96142ffd9ddc" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:730e95cd0271453376b3c1d9623838d6", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:730e95cd0271453376b3c1d9623838d6", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:databricks" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d6f502d4d8165f68d5b594ab4cb2171e", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:abc@acryl.io", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:730e95cd0271453376b3c1d9623838d6", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a1123d3ed81951784140565f5085b96d", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:730e95cd0271453376b3c1d9623838d6", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Catalog" - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d6f502d4d8165f68d5b594ab4cb2171e", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:045573d60442121f01b8d66a3eb95622" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:730e95cd0271453376b3c1d9623838d6", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:account users", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a1123d3ed81951784140565f5085b96d", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:account users", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a1123d3ed81951784140565f5085b96d", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Schema" - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a1123d3ed81951784140565f5085b96d", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a1123d3ed81951784140565f5085b96d", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:databricks" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a1123d3ed81951784140565f5085b96d", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "databricks", - "env": "PROD", - "catalog": "system", - "unity_schema": "quickstart_schema" - }, - "externalUrl": "https://dummy.cloud.databricks.com/explore/data/system/quickstart_schema", - "name": "quickstart_schema", - "description": "A new Unity Catalog schema called quickstart_schema" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a1123d3ed81951784140565f5085b96d", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4", - "urn": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:Service Principal 2", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Catalog" - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c45a3b960d7503abfb5549f583eb0517", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "databricks", - "env": "PROD", - "catalog": "main", - "unity_schema": "quickstart_schema" - }, - "externalUrl": "https://dummy.cloud.databricks.com/explore/data/main/quickstart_schema", - "name": "quickstart_schema", - "description": "A new Unity Catalog schema called quickstart_schema" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c45a3b960d7503abfb5549f583eb0517", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c45a3b960d7503abfb5549f583eb0517", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:databricks" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:databricks" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "databricks", - "env": "PROD", - "catalog": "system" - }, - "externalUrl": "https://dummy.cloud.databricks.com/explore/data/system", - "name": "system", - "description": "System catalog (auto-created)" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c45a3b960d7503abfb5549f583eb0517", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:045573d60442121f01b8d66a3eb95622", - "urn": "urn:li:container:045573d60442121f01b8d66a3eb95622" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "databricks", - "env": "PROD", - "catalog": "quickstart_catalog", - "unity_schema": "quickstart_schema" - }, - "externalUrl": "https://dummy.cloud.databricks.com/explore/data/quickstart_catalog/quickstart_schema", - "name": "quickstart_schema", - "description": "A new Unity Catalog schema called quickstart_schema" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c45a3b960d7503abfb5549f583eb0517", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Schema" - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:databricks" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c45a3b960d7503abfb5549f583eb0517", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:account users", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:730e95cd0271453376b3c1d9623838d6", - "urn": "urn:li:container:730e95cd0271453376b3c1d9623838d6" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Schema" - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b84ba0cc36e3b2636942fc34ec5724d6", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:account users", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:c45a3b960d7503abfb5549f583eb0517", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:045573d60442121f01b8d66a3eb95622" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:730e95cd0271453376b3c1d9623838d6" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b84ba0cc36e3b2636942fc34ec5724d6", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:abc@acryl.io", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b84ba0cc36e3b2636942fc34ec5724d6", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Schema" - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b84ba0cc36e3b2636942fc34ec5724d6", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b84ba0cc36e3b2636942fc34ec5724d6", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:databricks" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b84ba0cc36e3b2636942fc34ec5724d6", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "databricks", - "env": "PROD", - "catalog": "system", - "unity_schema": "default" - }, - "externalUrl": "https://dummy.cloud.databricks.com/explore/data/system/default", - "name": "default", - "description": "Default schema (auto-created)" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b84ba0cc36e3b2636942fc34ec5724d6", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4", - "urn": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:39e66cf95f60943f217d96142ffd9ddc", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:730e95cd0271453376b3c1d9623838d6" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:39e66cf95f60943f217d96142ffd9ddc", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:abc@acryl.io", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:934b6043df189ef6dc63ac3519be34ac" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:39e66cf95f60943f217d96142ffd9ddc", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Schema" - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "quickstart_catalog.quickstart_schema.quickstart_table", - "platform": "urn:li:dataPlatform:databricks", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "columnA", - "nullable": true, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "columnB", - "nullable": true, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "storage_location": "s3://db-02eec1f70bfe4115445be9fdb1aac6ac-s3-root-bucket/metastore/2c983545-d403-4f87-9063-5b7e3b6d3736/tables/cff27aa1-1c6a-4d78-b713-562c660c2896", - "data_source_format": "DELTA", - "generation": "2", - "table_type": "MANAGED", - "created_by": "abc@acryl.io", - "delta.lastCommitTimestamp": "1666185711000", - "delta.lastUpdateVersion": "1", - "delta.minReaderVersion": "1", - "delta.minWriterVersion": "2", - "spark.sql.statistics.numRows": "10", - "spark.sql.statistics.totalSize": "512", - "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", - "owner": "account users", - "updated_by": "abc@acryl.io", - "updated_at": "2022-10-19 13:27:29.633000+00:00", - "created_at": "2022-10-19 13:21:38.688000+00:00" - }, - "externalUrl": "https://dummy.cloud.databricks.com/explore/data/quickstart_catalog/quickstart_schema/quickstart_table", - "name": "quickstart_table", - "qualifiedName": "quickstart_catalog.quickstart_schema.quickstart_table", - "created": { - "time": 1666185698688, - "actor": "urn:li:corpuser:abc@acryl.io" - }, - "lastModified": { - "time": 1666186049633, - "actor": "urn:li:corpuser:abc@acryl.io" - }, - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:730e95cd0271453376b3c1d9623838d6", - "urn": "urn:li:container:730e95cd0271453376b3c1d9623838d6" - }, - { - "id": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", - "urn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "json": { - "owners": [ - { - "owner": "urn:li:corpuser:account users", - "type": "DATAOWNER" - } - ], - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - } - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:39e66cf95f60943f217d96142ffd9ddc", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:39e66cf95f60943f217d96142ffd9ddc", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:databricks" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:39e66cf95f60943f217d96142ffd9ddc", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "databricks", - "env": "PROD", - "catalog": "quickstart_catalog", - "unity_schema": "default" - }, - "externalUrl": "https://dummy.cloud.databricks.com/explore/data/quickstart_catalog/default", - "name": "default", - "description": "Default schema (auto-created)" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:39e66cf95f60943f217d96142ffd9ddc", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:730e95cd0271453376b3c1d9623838d6", - "urn": "urn:li:container:730e95cd0271453376b3c1d9623838d6" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1705308660402, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, - "rowCount": 3, - "columnCount": 3, - "fieldProfiles": [ + "path": [ { - "fieldPath": "betStatusId", - "uniqueCount": 1, - "uniqueProportion": 0.3333333333333333, - "nullCount": 0, - "nullProportion": 0.0 + "id": "urn:li:container:730e95cd0271453376b3c1d9623838d6", + "urn": "urn:li:container:730e95cd0271453376b3c1d9623838d6" }, { - "fieldPath": "channelId", - "uniqueCount": 1, - "uniqueProportion": 0.3333333333333333, - "nullCount": 0, - "nullProportion": 0.0 + "id": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", + "urn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac" } - ], - "sizeInBytes": 1024 - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1705308660401, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, - "rowCount": 10, - "columnCount": 2, - "fieldProfiles": [], - "sizeInBytes": 512 - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "storage_location": "s3://db-02eec1f70bfe4115445be9fdb1aac6ac-s3-root-bucket/metastore/2c983545-d403-4f87-9063-5b7e3b6d3736/tables/cff27aa1-1c6a-4d78-b713-562c660c2896", - "data_source_format": "DELTA", - "generation": "2", - "table_type": "MANAGED", - "created_by": "abc@acryl.io", - "delta.lastCommitTimestamp": "1666185711000", - "delta.lastUpdateVersion": "1", - "delta.minReaderVersion": "1", - "delta.minWriterVersion": "2", - "spark.sql.statistics.numRows": "10", - "spark.sql.statistics.totalSize": "512", - "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", - "owner": "account users", - "updated_by": "abc@acryl.io", - "updated_at": "2022-10-19 13:27:29.633000+00:00", - "created_at": "2022-10-19 13:21:38.688000+00:00" - }, - "externalUrl": "https://dummy.cloud.databricks.com/explore/data/system/quickstart_schema/quickstart_table", - "name": "quickstart_table", - "qualifiedName": "system.quickstart_schema.quickstart_table", - "created": { - "time": 1666185698688, - "actor": "urn:li:corpuser:abc@acryl.io" - }, - "lastModified": { - "time": 1666186049633, - "actor": "urn:li:corpuser:abc@acryl.io" - }, - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a1123d3ed81951784140565f5085b96d" - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" ] } }, @@ -3423,8 +1568,8 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.quickstart_schema.quickstart_table,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -3435,6 +1580,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -3449,88 +1595,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.view1,PROD)", - "changeType": "UPSERT", - "aspectName": "upstreamLineage", - "aspect": { - "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD),betStatusId)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.view1,PROD),betStatusId)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD),channelId)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.view1,PROD),channelId)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD),combination)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.view1,PROD),combination)" - ], - "confidenceScore": 1.0 - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.quickstart_schema.quickstart_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table_external,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "system.quickstart_schema.quickstart_table", + "schemaName": "quickstart_catalog.quickstart_schema.quickstart_table_external", "platform": "urn:li:dataPlatform:databricks", "version": 0, "created": { @@ -3568,60 +1638,11 @@ "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.quickstart_schema.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4", - "urn": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4" - }, - { - "id": "urn:li:container:a1123d3ed81951784140565f5085b96d", - "urn": "urn:li:container:a1123d3ed81951784140565f5085b96d" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1638860400000, - "runId": "unity-catalog-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.default.quickstart_table,PROD)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1705308660401, - "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" - }, - "rowCount": 10, - "columnCount": 2, - "fieldProfiles": [], - "sizeInBytes": 512 + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + } + ] } }, "systemMetadata": { @@ -3632,7 +1653,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.default.quickstart_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table_external,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -3641,7 +1662,7 @@ "storage_location": "s3://db-02eec1f70bfe4115445be9fdb1aac6ac-s3-root-bucket/metastore/2c983545-d403-4f87-9063-5b7e3b6d3736/tables/cff27aa1-1c6a-4d78-b713-562c660c2896", "data_source_format": "DELTA", "generation": "2", - "table_type": "MANAGED", + "table_type": "EXTERNAL", "created_by": "abc@acryl.io", "delta.lastCommitTimestamp": "1666185711000", "delta.lastUpdateVersion": "1", @@ -3655,9 +1676,9 @@ "updated_at": "2022-10-19 13:27:29.633000+00:00", "created_at": "2022-10-19 13:21:38.688000+00:00" }, - "externalUrl": "https://dummy.cloud.databricks.com/explore/data/system/default/quickstart_table", - "name": "quickstart_table", - "qualifiedName": "system.default.quickstart_table", + "externalUrl": "https://dummy.cloud.databricks.com/explore/data/quickstart_catalog/quickstart_schema/quickstart_table_external", + "name": "quickstart_table_external", + "qualifiedName": "quickstart_catalog.quickstart_schema.quickstart_table_external", "created": { "time": 1666185698688, "actor": "urn:li:corpuser:abc@acryl.io" @@ -3677,12 +1698,21 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.default.quickstart_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table_external,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "upstreamLineage", "aspect": { "json": { - "container": "urn:li:container:b84ba0cc36e3b2636942fc34ec5724d6" + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,db-02eec1f70bfe4115445be9fdb1aac6ac-s3-root-bucket/metastore/2c983545-d403-4f87-9063-5b7e3b6d3736/tables/cff27aa1-1c6a-4d78-b713-562c660c2896,PROD)", + "type": "VIEW" + } + ] } }, "systemMetadata": { @@ -3693,7 +1723,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.default.quickstart_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table_external,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -3709,9 +1739,25 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:730e95cd0271453376b3c1d9623838d6" + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "unity-catalog-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.default.quickstart_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table_external,PROD)", "changeType": "UPSERT", "aspectName": "ownership", "aspect": { @@ -3722,6 +1768,7 @@ "type": "DATAOWNER" } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:unknown" @@ -3736,12 +1783,30 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.default.quickstart_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "container", "aspect": { "json": { - "removed": false + "container": "urn:li:container:934b6043df189ef6dc63ac3519be34ac" + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "unity-catalog-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -3752,12 +1817,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.default.quickstart_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "system.default.quickstart_table", + "schemaName": "quickstart_catalog.quickstart_schema.quickstart_table", "platform": "urn:li:dataPlatform:databricks", "version": 0, "created": { @@ -3810,19 +1875,64 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,system.default.quickstart_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "storage_location": "s3://db-02eec1f70bfe4115445be9fdb1aac6ac-s3-root-bucket/metastore/2c983545-d403-4f87-9063-5b7e3b6d3736/tables/cff27aa1-1c6a-4d78-b713-562c660c2896", + "data_source_format": "DELTA", + "generation": "2", + "table_type": "MANAGED", + "created_by": "abc@acryl.io", + "delta.lastCommitTimestamp": "1666185711000", + "delta.lastUpdateVersion": "1", + "delta.minReaderVersion": "1", + "delta.minWriterVersion": "2", + "spark.sql.statistics.numRows": "10", + "spark.sql.statistics.totalSize": "512", + "table_id": "cff27aa1-1c6a-4d78-b713-562c660c2896", + "owner": "account users", + "updated_by": "abc@acryl.io", + "updated_at": "2022-10-19 13:27:29.633000+00:00", + "created_at": "2022-10-19 13:21:38.688000+00:00" + }, + "externalUrl": "https://dummy.cloud.databricks.com/explore/data/quickstart_catalog/quickstart_schema/quickstart_table", + "name": "quickstart_table", + "qualifiedName": "quickstart_catalog.quickstart_schema.quickstart_table", + "created": { + "time": 1666185698688, + "actor": "urn:li:corpuser:abc@acryl.io" + }, + "lastModified": { + "time": 1666186049633, + "actor": "urn:li:corpuser:abc@acryl.io" + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "unity-catalog-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4", - "urn": "urn:li:container:94af9873ac56d46a4bcc995836dc15d4" + "id": "urn:li:container:730e95cd0271453376b3c1d9623838d6", + "urn": "urn:li:container:730e95cd0271453376b3c1d9623838d6" }, { - "id": "urn:li:container:b84ba0cc36e3b2636942fc34ec5724d6", - "urn": "urn:li:container:b84ba0cc36e3b2636942fc34ec5724d6" + "id": "urn:li:container:934b6043df189ef6dc63ac3519be34ac", + "urn": "urn:li:container:934b6043df189ef6dc63ac3519be34ac" } ] } @@ -3835,7 +1945,33 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.default.quickstart_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:account users", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "unity-catalog-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD)", "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { @@ -3845,10 +1981,25 @@ "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" }, - "rowCount": 10, - "columnCount": 2, - "fieldProfiles": [], - "sizeInBytes": 512 + "rowCount": 3, + "columnCount": 3, + "fieldProfiles": [ + { + "fieldPath": "betStatusId", + "uniqueCount": 1, + "uniqueProportion": 0.3333333333333333, + "nullCount": 0, + "nullProportion": 0.0 + }, + { + "fieldPath": "channelId", + "uniqueCount": 1, + "uniqueProportion": 0.3333333333333333, + "nullCount": 0, + "nullProportion": 0.0 + } + ], + "sizeInBytes": 1024 } }, "systemMetadata": { @@ -3859,7 +2010,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,main.default.quickstart_table,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -3873,6 +2024,66 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD),betStatusId)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.view1,PROD),betStatusId)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD),channelId)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.view1,PROD),channelId)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.bet,PROD),combination)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.view1,PROD),combination)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "unity-catalog-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,hive_metastore.bronze_kambi.view1,PROD)", @@ -3928,5 +2139,45 @@ "runId": "unity-catalog-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table_external,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProfile", + "aspect": { + "json": { + "timestampMillis": 1708671016385, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "rowCount": 10, + "columnCount": 2, + "fieldProfiles": [], + "sizeInBytes": 512 + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "unity-catalog-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:databricks,quickstart_catalog.quickstart_schema.quickstart_table_external,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "unity-catalog-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file From 104e78776dfb9cd3b9a10063aadb54dcf14d65d1 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 18 Mar 2024 13:59:01 -0700 Subject: [PATCH 2/2] fix(ingest): only auto-enable stateful ingestion if pipeline name is set (#10075) --- docs/how/updating-datahub.md | 2 +- .../ingestion/source/state/stateful_ingestion_base.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 9d46fe606fa56..cc8de2b541ce2 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -20,7 +20,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Breaking Changes -- #9934 - Stateful ingestion is now enabled by default if datahub-rest sink is used or if a `datahub_api` is specified. It will still be disabled by default when any other sink type is used. +- #9934 and #10075 - Stateful ingestion is now enabled by default if a `pipeline_name` is set and either a datahub-rest sink or `datahub_api` is specified. It will still be disabled by default when any other sink type is used or if there is no pipeline name set. - #10002 - The `DataHubGraph` client no longer makes a request to the backend during initialization. If you want to preserve the old behavior, call `graph.test_connection()` after constructing the client. ### Potential Downtime diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py index 521f8f5ee07d8..4e9e1425a9ae0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py @@ -53,7 +53,7 @@ class StatefulIngestionConfig(ConfigModel): enabled: bool = Field( default=False, description="Whether or not to enable stateful ingest. " - "Default: True if datahub-rest sink is used or if a `datahub_api` is specified, otherwise False", + "Default: True if a pipeline_name is set and either a datahub-rest sink or `datahub_api` is specified, otherwise False", ) max_checkpoint_state_size: pydantic.PositiveInt = Field( default=2**24, # 16 MB @@ -233,9 +233,13 @@ def _initialize_checkpointing_state_provider(self) -> None: IngestionCheckpointingProviderBase ] = None - if self.stateful_ingestion_config is None and self.ctx.graph: + if ( + self.stateful_ingestion_config is None + and self.ctx.graph + and self.ctx.pipeline_name + ): logger.info( - "Stateful ingestion got enabled by default, as datahub-rest sink is used or `datahub_api` is specified" + "Stateful ingestion will be automatically enabled, as datahub-rest sink is used or `datahub_api` is specified" ) self.stateful_ingestion_config = StatefulIngestionConfig( enabled=True,