From 50139431bef0113145310910e24caf5b18c17e77 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 2 Aug 2024 15:48:10 -0700 Subject: [PATCH 1/9] fix(ingest): set lastObserved in sdk when unset (#11071) --- metadata-ingestion/src/datahub/cli/cli_utils.py | 3 +++ metadata-ingestion/tests/unit/test_rest_sink.py | 12 ++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py index 21841b173c23d..1b9cccb1cbc21 100644 --- a/metadata-ingestion/src/datahub/cli/cli_utils.py +++ b/metadata-ingestion/src/datahub/cli/cli_utils.py @@ -1,5 +1,6 @@ import json import logging +import time import typing from datetime import datetime from typing import Any, Dict, List, Optional, Tuple, Type, Union @@ -403,6 +404,8 @@ def ensure_has_system_metadata( if event.systemMetadata is None: event.systemMetadata = SystemMetadataClass() metadata = event.systemMetadata + if metadata.lastObserved == 0: + metadata.lastObserved = int(time.time() * 1000) if metadata.properties is None: metadata.properties = {} props = metadata.properties diff --git a/metadata-ingestion/tests/unit/test_rest_sink.py b/metadata-ingestion/tests/unit/test_rest_sink.py index efa6c6678a8c7..a76f96039c2c7 100644 --- a/metadata-ingestion/tests/unit/test_rest_sink.py +++ b/metadata-ingestion/tests/unit/test_rest_sink.py @@ -1,7 +1,9 @@ import json +from datetime import datetime, timezone import pytest import requests +from freezegun import freeze_time import datahub.metadata.schema_classes as models from datahub.emitter.mcp import MetadataChangeProposalWrapper @@ -9,6 +11,7 @@ MOCK_GMS_ENDPOINT = "http://fakegmshost:8080" +FROZEN_TIME = 1618987484580 basicAuditStamp = models.AuditStampClass( time=1618987484580, actor="urn:li:corpuser:datahub", @@ -76,7 +79,7 @@ } }, "systemMetadata": { - "lastObserved": 0, + "lastObserved": FROZEN_TIME, "lastRunId": "no-run-id-provided", "properties": { "clientId": "acryl-datahub", @@ -134,7 +137,7 @@ } }, "systemMetadata": { - "lastObserved": 0, + "lastObserved": FROZEN_TIME, "lastRunId": "no-run-id-provided", "properties": { "clientId": "acryl-datahub", @@ -178,7 +181,7 @@ } }, "systemMetadata": { - "lastObserved": 0, + "lastObserved": FROZEN_TIME, "lastRunId": "no-run-id-provided", "properties": { "clientId": "acryl-datahub", @@ -263,7 +266,7 @@ "contentType": "application/json", }, "systemMetadata": { - "lastObserved": 0, + "lastObserved": FROZEN_TIME, "lastRunId": "no-run-id-provided", "properties": { "clientId": "acryl-datahub", @@ -276,6 +279,7 @@ ), ], ) +@freeze_time(datetime.fromtimestamp(FROZEN_TIME / 1000, tz=timezone.utc)) def test_datahub_rest_emitter(requests_mock, record, path, snapshot): def match_request_text(request: requests.Request) -> bool: requested_snapshot = request.json() From 6704d444704c11239b6a3ee0075fd5045c3e2f89 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Sat, 3 Aug 2024 05:58:20 +0200 Subject: [PATCH 2/9] doc(ingest): Update capabilities (#11072) --- .../datahub/ingestion/source/abs/source.py | 5 ----- .../datahub/ingestion/source/csv_enricher.py | 7 ++++++- .../ingestion/source/dynamodb/dynamodb.py | 5 ----- .../src/datahub/ingestion/source/feast.py | 2 ++ .../datahub/ingestion/source/kafka_connect.py | 1 + .../src/datahub/ingestion/source/mode.py | 4 ++++ .../ingestion/source/powerbi/powerbi.py | 4 ++++ .../src/datahub/ingestion/source/pulsar.py | 1 + .../ingestion/source/qlik_sense/qlik_sense.py | 10 ++++++++++ .../src/datahub/ingestion/source/redash.py | 4 +++- .../ingestion/source/redshift/redshift.py | 4 +++- .../src/datahub/ingestion/source/s3/source.py | 7 +++---- .../datahub/ingestion/source/salesforce.py | 8 ++++++++ .../datahub/ingestion/source/sigma/sigma.py | 4 ++++ .../ingestion/source/sql/sql_common.py | 20 +++++++++++++++++++ .../src/datahub/ingestion/source/superset.py | 1 + .../datahub/ingestion/source/unity/source.py | 3 +++ 17 files changed, 73 insertions(+), 17 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py index c9833f6982599..39ebd79c2e226 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py @@ -198,11 +198,6 @@ class TableData: @support_status(SupportStatus.INCUBATING) @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration") @capability(SourceCapability.TAGS, "Can extract ABS object/container tags if enabled") -@capability( - SourceCapability.DELETION_DETECTION, - "Optionally enabled via `stateful_ingestion.remove_stale_metadata`", - supported=True, -) class ABSSource(StatefulIngestionSourceBase): source_config: DataLakeSourceConfig report: DataLakeSourceReport diff --git a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py index feee89ba57983..e3f9a150ad000 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py +++ b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py @@ -12,11 +12,12 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, + capability, config_class, platform_name, support_status, ) -from datahub.ingestion.api.source import Source, SourceReport +from datahub.ingestion.api.source import Source, SourceCapability, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source_config.csv_enricher import CSVEnricherConfig from datahub.metadata.schema_classes import ( @@ -96,6 +97,10 @@ class CSVEnricherReport(SourceReport): @platform_name("CSV Enricher") @config_class(CSVEnricherConfig) @support_status(SupportStatus.INCUBATING) +@capability(SourceCapability.DOMAINS, "Supported by default") +@capability(SourceCapability.TAGS, "Supported by default") +@capability(SourceCapability.DESCRIPTIONS, "Supported by default") +@capability(SourceCapability.OWNERSHIP, "Supported by default") class CSVEnricherSource(Source): """ :::tip Looking to ingest a CSV data file into DataHub, as an asset? diff --git a/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py b/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py index 6cab0ffc8f25c..acda656526ef5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py @@ -166,11 +166,6 @@ def report_dropped(self, name: str) -> None: SourceCapability.PLATFORM_INSTANCE, "By default, platform_instance will use the AWS account id", ) -@capability( - SourceCapability.DELETION_DETECTION, - "Optionally enabled via `stateful_ingestion.remove_stale_metadata`", - supported=True, -) class DynamoDBSource(StatefulIngestionSourceBase): """ This plugin extracts the following: diff --git a/metadata-ingestion/src/datahub/ingestion/source/feast.py b/metadata-ingestion/src/datahub/ingestion/source/feast.py index db0c8e9c39e7b..e097fd1f221ea 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/feast.py +++ b/metadata-ingestion/src/datahub/ingestion/source/feast.py @@ -96,6 +96,8 @@ class FeastRepositorySourceConfig(ConfigModel): @platform_name("Feast") @config_class(FeastRepositorySourceConfig) @support_status(SupportStatus.CERTIFIED) +@capability(SourceCapability.DESCRIPTIONS, "Enabled by default") +@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") @dataclass class FeastRepositorySource(Source): diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py index 17047457e0eba..266f9f6db5762 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py @@ -1125,6 +1125,7 @@ def transform_connector_config( @config_class(KafkaConnectSourceConfig) @support_status(SupportStatus.CERTIFIED) @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") class KafkaConnectSource(StatefulIngestionSourceBase): config: KafkaConnectSourceConfig diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index 3da7f98e93008..2f044bdcfcd21 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -210,8 +210,12 @@ def report_dropped_space(self, ent_name: str) -> None: @platform_name("Mode") @config_class(ModeConfig) @support_status(SupportStatus.CERTIFIED) +@capability(SourceCapability.CONTAINERS, "Enabled by default") +@capability(SourceCapability.DESCRIPTIONS, "Enabled by default") @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") @capability(SourceCapability.LINEAGE_COARSE, "Supported by default") +@capability(SourceCapability.LINEAGE_FINE, "Supported by default") +@capability(SourceCapability.OWNERSHIP, "Enabled by default") class ModeSource(StatefulIngestionSourceBase): """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index 73f242a06b1d6..e0a72c71a1ef0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -1220,8 +1220,12 @@ def report_to_datahub_work_units( @platform_name("PowerBI") @config_class(PowerBiDashboardSourceConfig) @support_status(SupportStatus.CERTIFIED) +@capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(SourceCapability.DESCRIPTIONS, "Enabled by default") +@capability(SourceCapability.OWNERSHIP, "Enabled by default") @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") +@capability(SourceCapability.TAGS, "Enabled by default") @capability( SourceCapability.OWNERSHIP, "Disabled by default, configured using `extract_ownership`", diff --git a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py index 7671e23928430..790c1f918cdfd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py +++ b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py @@ -91,6 +91,7 @@ def __init__(self, schema): @config_class(PulsarSourceConfig) @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field") +@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") @dataclass class PulsarSource(StatefulIngestionSourceBase): def __init__(self, config: PulsarSourceConfig, ctx: PipelineContext): diff --git a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py index b9fd2a9c4fe22..b6c48dd3c488e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py +++ b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py @@ -93,12 +93,22 @@ @platform_name("Qlik Sense") @config_class(QlikSourceConfig) @support_status(SupportStatus.INCUBATING) +@capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(SourceCapability.DESCRIPTIONS, "Enabled by default") +@capability( + SourceCapability.LINEAGE_COARSE, + "Enabled by default.", +) +@capability( + SourceCapability.LINEAGE_FINE, + "Disabled by default. ", +) @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") @capability( SourceCapability.OWNERSHIP, "Enabled by default, configured using `ingest_owner`", ) +@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") class QlikSenseSource(StatefulIngestionSourceBase, TestableSource): """ This plugin extracts the following: diff --git a/metadata-ingestion/src/datahub/ingestion/source/redash.py b/metadata-ingestion/src/datahub/ingestion/source/redash.py index c7a3f25e947dc..38cf0bebcbc12 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redash.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redash.py @@ -18,12 +18,13 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( # SourceCapability,; capability, SupportStatus, + capability, config_class, platform_name, support_status, ) from datahub.ingestion.api.registry import import_path -from datahub.ingestion.api.source import Source, SourceReport +from datahub.ingestion.api.source import Source, SourceCapability, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.metadata.com.linkedin.pegasus2avro.common import ( AuditStamp, @@ -308,6 +309,7 @@ def report_dropped(self, item: str) -> None: @platform_name("Redash") @config_class(RedashConfig) @support_status(SupportStatus.INCUBATING) +@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") class RedashSource(Source): """ This plugin extracts the following: diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index a6ffed65aaa70..a9fc9ab8f3e99 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -120,7 +120,7 @@ @platform_name("Redshift") @config_class(RedshiftConfig) @support_status(SupportStatus.CERTIFIED) -@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field") @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration") @capability(SourceCapability.DESCRIPTIONS, "Enabled by default") @@ -129,6 +129,8 @@ SourceCapability.LINEAGE_FINE, "Optionally enabled via configuration (`mixed` or `sql_based` lineage needs to be enabled)", ) +@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") @capability( SourceCapability.USAGE_STATS, "Enabled by default, can be disabled via configuration `include_usage_statistics`", diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index c35f500df1b8c..b8c7fd5aa88fc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -220,13 +220,12 @@ class TableData: @platform_name("S3 / Local Files", id="s3") @config_class(DataLakeSourceConfig) @support_status(SupportStatus.INCUBATING) +@capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration") -@capability(SourceCapability.TAGS, "Can extract S3 object/bucket tags if enabled") @capability( - SourceCapability.DELETION_DETECTION, - "Optionally enabled via `stateful_ingestion.remove_stale_metadata`", - supported=True, + SourceCapability.SCHEMA_METADATA, "Can infer schema from supported file types" ) +@capability(SourceCapability.TAGS, "Can extract S3 object/bucket tags if enabled") class S3Source(StatefulIngestionSourceBase): source_config: DataLakeSourceConfig report: DataLakeSourceReport diff --git a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py index 946fdcedc571f..42128123c6144 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py +++ b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py @@ -199,6 +199,14 @@ def report_dropped(self, ent_name: str) -> None: description="Not supported yet", supported=False, ) +@capability( + capability_name=SourceCapability.SCHEMA_METADATA, + description="Enabled by default", +) +@capability( + capability_name=SourceCapability.TAGS, + description="Enabled by default", +) class SalesforceSource(Source): base_url: str config: SalesforceConfig diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py index 74d7abb121a3e..5db5e543510db 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py @@ -86,8 +86,12 @@ @platform_name("Sigma") @config_class(SigmaSourceConfig) @support_status(SupportStatus.INCUBATING) +@capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(SourceCapability.DESCRIPTIONS, "Enabled by default") +@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default.") @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") +@capability(SourceCapability.TAGS, "Enabled by default") @capability( SourceCapability.OWNERSHIP, "Enabled by default, configured using `ingest_owner`", diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index b3a5f134c61d6..de3012cc33568 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -320,6 +320,26 @@ class ProfileMetadata: "Optionally enabled via `classification.enabled`", supported=True, ) +@capability( + SourceCapability.SCHEMA_METADATA, + "Enabled by default", + supported=True, +) +@capability( + SourceCapability.CONTAINERS, + "Enabled by default", + supported=True, +) +@capability( + SourceCapability.DESCRIPTIONS, + "Enabled by default", + supported=True, +) +@capability( + SourceCapability.DOMAINS, + "Enabled by default", + supported=True, +) class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource): """A Base class for all SQL Sources that use SQLAlchemy to extend""" diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index dd2dc3301d80e..0656f13feba64 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -158,6 +158,7 @@ def get_filter_name(filter_obj): @capability( SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion" ) +@capability(SourceCapability.DOMAINS, "Enabled by `domain` config to assign domain_key") @capability(SourceCapability.LINEAGE_COARSE, "Supported by default") class SupersetSource(StatefulIngestionSourceBase): """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py index b29170cb2d705..9a6cde78cf10d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py @@ -140,6 +140,9 @@ @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field") @capability(SourceCapability.CONTAINERS, "Enabled by default") @capability(SourceCapability.OWNERSHIP, "Supported via the `include_ownership` config") +@capability( + SourceCapability.DATA_PROFILING, "Supported via the `profiling.enabled` config" +) @capability( SourceCapability.DELETION_DETECTION, "Optionally enabled via `stateful_ingestion.remove_stale_metadata`", From a8ef7b68c85f7bda9810a621fd4938184ff8d98b Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Sat, 3 Aug 2024 18:30:16 +0530 Subject: [PATCH 3/9] chore(vulnerability): Log Injection (#11090) --- datahub-frontend/app/controllers/SsoCallbackController.java | 3 ++- .../src/main/java/com/datahub/graphql/GraphQLController.java | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/datahub-frontend/app/controllers/SsoCallbackController.java b/datahub-frontend/app/controllers/SsoCallbackController.java index 5e30bf976b819..750886570bf40 100644 --- a/datahub-frontend/app/controllers/SsoCallbackController.java +++ b/datahub-frontend/app/controllers/SsoCallbackController.java @@ -66,7 +66,8 @@ public SsoCallbackController( public CompletionStage handleCallback(String protocol, Http.Request request) { if (shouldHandleCallback(protocol)) { - log.debug(String.format("Handling SSO callback. Protocol: %s", protocol)); + log.debug("Handling SSO callback. Protocol: {}", + _ssoManager.getSsoProvider().protocol().getCommonName()); return callback(request) .handle( (res, e) -> { diff --git a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java index 2f383b1956313..f04f5f80d03a4 100644 --- a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java +++ b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java @@ -75,7 +75,7 @@ CompletableFuture> postGraphQL( try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error("Failed to parse json {}", jsonStr); + log.error("Failed to parse json ", e); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } From b544c0a3b0c689b6620d87f04b58b81b3fe74dec Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Sat, 3 Aug 2024 18:31:05 +0530 Subject: [PATCH 4/9] chore(vulnerability): Information exposure through a stack trace (#11091) --- .../auth/authentication/filter/AuthenticationFilter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java b/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java index dd68e4d36f14e..ee2efd2ae9536 100644 --- a/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java +++ b/metadata-service/auth-filter/src/main/java/com/datahub/auth/authentication/filter/AuthenticationFilter.java @@ -92,7 +92,7 @@ public void doFilter(ServletRequest request, ServletResponse response, FilterCha "Failed to authenticate request. Received an AuthenticationExpiredException from authenticator chain.", e); ((HttpServletResponse) response) - .sendError(HttpServletResponse.SC_UNAUTHORIZED, e.getMessage()); + .sendError(HttpServletResponse.SC_UNAUTHORIZED, "Unauthorized to perform this action."); return; } From c572d39bbf3f6bd49478a47735679297a3ca43d3 Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Sat, 3 Aug 2024 18:32:01 +0530 Subject: [PATCH 5/9] chore(vulnerability): Comparison of narrow type with wide type in loop condition (#11089) --- .../com/linkedin/metadata/models/extractor/FieldExtractor.java | 2 +- .../metadata/boot/steps/IngestDataPlatformInstancesStep.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java b/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java index 899f66e66ea5a..bef7782d8f7c9 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/extractor/FieldExtractor.java @@ -63,7 +63,7 @@ public static Map> extractFields( } else { List valueList = (List) value.get(); // If the field is a nested list of values, flatten it - for (int i = 0; i < numArrayWildcards - 1; i++) { + for (long i = 0; i < numArrayWildcards - 1; i++) { valueList = valueList.stream() .flatMap(v -> ((List) v).stream()) diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java index 638d1a6fd0c8a..22ce06a5984ea 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java @@ -61,7 +61,7 @@ public void execute(@Nonnull OperationContext systemOperationContext) throws Exc long numEntities = _migrationsDao.countEntities(); int start = 0; - while (start < numEntities) { + while (start < (int) numEntities) { log.info( "Reading urns {} to {} from the aspects table to generate dataplatform instance aspects", start, From aeef69cbafa72425c303f37cc74c114eeb59d5da Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Sat, 3 Aug 2024 18:32:43 +0530 Subject: [PATCH 6/9] chore(vulnerability): Insertion of sensitive information into log files (#11088) --- .../graphql/resolvers/auth/RevokeAccessTokenResolver.java | 2 +- .../resolvers/ingest/secret/DeleteSecretResolver.java | 8 ++++---- .../main/java/com/linkedin/metadata/utils/SearchUtil.java | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java index 53ae6d4509e7d..eb15208769902 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/RevokeAccessTokenResolver.java @@ -41,7 +41,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw final QueryContext context = environment.getContext(); final String tokenId = bindArgument(environment.getArgument("tokenId"), String.class); - log.info("User {} revoking access token {}", context.getActorUrn(), tokenId); + log.info("User {} revoking access token", context.getActorUrn()); if (isAuthorizedToRevokeToken(context, tokenId)) { try { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java index f557b9889f604..da81d560c6dbd 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/DeleteSecretResolver.java @@ -23,16 +23,16 @@ public DeleteSecretResolver(final EntityClient entityClient) { public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); if (IngestionAuthUtils.canManageSecrets(context)) { - final String secretUrn = environment.getArgument("urn"); - final Urn urn = Urn.createFromString(secretUrn); + final String inputUrn = environment.getArgument("urn"); + final Urn urn = Urn.createFromString(inputUrn); return GraphQLConcurrencyUtils.supplyAsync( () -> { try { _entityClient.deleteEntity(context.getOperationContext(), urn); - return secretUrn; + return inputUrn; } catch (Exception e) { throw new RuntimeException( - String.format("Failed to perform delete against secret with urn %s", secretUrn), + String.format("Failed to perform delete against secret with urn %s", inputUrn), e); } }, diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java index c3c9cac6280ed..aa18124c826da 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java @@ -54,10 +54,10 @@ public static List convertToFilters( public static FilterValue createFilterValue(String value, Long facetCount, Boolean isFilteredOn) { // TODO(indy): test this - String[] aggregationTokens = value.split(AGGREGATION_SEPARATOR_CHAR); + String[] aggregations = value.split(AGGREGATION_SEPARATOR_CHAR); FilterValue result = new FilterValue().setValue(value).setFacetCount(facetCount).setFiltered(isFilteredOn); - String lastValue = aggregationTokens[aggregationTokens.length - 1]; + String lastValue = aggregations[aggregations.length - 1]; if (lastValue.startsWith(URN_PREFIX)) { try { result.setEntity(Urn.createFromString(lastValue)); From 2c9e3b74ed073f033bb858ecc3cc6028ff64ebc2 Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Sat, 3 Aug 2024 18:35:30 +0530 Subject: [PATCH 7/9] chore(vulnerability): Risky Cryptographic Algorithm (#11059) --- .../datahub/graphql/resolvers/ingest/secret/SecretUtils.java | 4 ++-- .../io/datahubproject/metadata/services/SecretService.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/SecretUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/SecretUtils.java index 225a5801adec9..87a3e5cb79ebf 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/SecretUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/secret/SecretUtils.java @@ -25,7 +25,7 @@ static String encrypt(String value, String secret) { } catch (NoSuchAlgorithmException e) { e.printStackTrace(); } - Cipher cipher = Cipher.getInstance("AES/ECB/PKCS5Padding"); + Cipher cipher = Cipher.getInstance("AES"); cipher.init(Cipher.ENCRYPT_MODE, secretKey); return Base64.getEncoder() .encodeToString(cipher.doFinal(value.getBytes(StandardCharsets.UTF_8))); @@ -48,7 +48,7 @@ static String decrypt(String encryptedValue, String secret) { } catch (NoSuchAlgorithmException e) { e.printStackTrace(); } - Cipher cipher = Cipher.getInstance("AES/ECB/PKCS5PADDING"); + Cipher cipher = Cipher.getInstance("AES"); cipher.init(Cipher.DECRYPT_MODE, secretKey); return new String(cipher.doFinal(Base64.getDecoder().decode(encryptedValue))); } catch (Exception e) { diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/services/SecretService.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/services/SecretService.java index bea03235abfb4..48e7f80173cfd 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/services/SecretService.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/services/SecretService.java @@ -50,7 +50,7 @@ public String encrypt(String value) { } catch (NoSuchAlgorithmException e) { e.printStackTrace(); } - Cipher cipher = Cipher.getInstance("AES/ECB/PKCS5Padding"); + Cipher cipher = Cipher.getInstance("AES"); cipher.init(Cipher.ENCRYPT_MODE, secretKey); return _encoder.encodeToString(cipher.doFinal(value.getBytes(StandardCharsets.UTF_8))); } catch (Exception e) { @@ -72,7 +72,7 @@ public String decrypt(String encryptedValue) { } catch (NoSuchAlgorithmException e) { e.printStackTrace(); } - Cipher cipher = Cipher.getInstance("AES/ECB/PKCS5PADDING"); + Cipher cipher = Cipher.getInstance("AES"); cipher.init(Cipher.DECRYPT_MODE, secretKey); return new String(cipher.doFinal(_decoder.decode(encryptedValue))); } catch (Exception e) { From 210b31135c0ff8508a136525a4299794eb3a1c1b Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Sat, 3 Aug 2024 18:39:40 +0530 Subject: [PATCH 8/9] chore(vulnerability): Overly permissive regex range (#11061) Co-authored-by: Harshal Sheth --- metadata-ingestion/src/datahub/ingestion/source/mode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index 2f044bdcfcd21..730a3521bd192 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -760,7 +760,7 @@ def _replace_definitions(self, raw_query: str) -> str: def _parse_definition_name(self, definition_variable: str) -> Tuple[str, str]: name, alias = "", "" # i.e '{{ @join_on_definition as alias}}' - name_match = re.findall("@[a-zA-z]+", definition_variable) + name_match = re.findall("@[a-zA-Z]+", definition_variable) if len(name_match): name = name_match[0][1:] alias_match = re.findall( From 903ec87356f4f41aa29ad76eff8261aad6973e5b Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Sun, 4 Aug 2024 02:57:37 +0900 Subject: [PATCH 9/9] fix: update customer data (#11075) --- .../cloud/CompanyLogos/customersData.json | 74 ++++++++++++++----- .../src/pages/cloud/CompanyLogos/index.js | 2 +- .../cloud/CompanyLogos/logos.module.scss | 2 +- 3 files changed, 57 insertions(+), 21 deletions(-) diff --git a/docs-website/src/pages/cloud/CompanyLogos/customersData.json b/docs-website/src/pages/cloud/CompanyLogos/customersData.json index 09943e8ef172f..e8a7470eab470 100644 --- a/docs-website/src/pages/cloud/CompanyLogos/customersData.json +++ b/docs-website/src/pages/cloud/CompanyLogos/customersData.json @@ -2,38 +2,50 @@ "customers": [ { "link": { - "href": "https://robinhood.com", + "href": "https://www.depop.com/", "blank": true }, "logo": { "asset": { - "_ref": "/img/logos/scrollingCompanies/acertus.webp" + "_ref": "https://cdn.sanity.io/images/cqo9wkgf/production/345d124a249d43bf0f20b608f8bfa2f7683311fa-360x180.png" }, - "alt": "Robinhood" + "alt": "depop" } }, { "link": { - "href": "https://www.dpgmediagroup.com/", + "href": "https://riskified.com", "blank": true }, "logo": { "asset": { - "_ref": "/img/logos/scrollingCompanies/autoscout24.webp" + "_ref": "https://cdn.sanity.io/images/cqo9wkgf/production/c982e0459bed565273a9b696d9d40aed76f84b1e-360x180.png" }, - "alt": "DPG Media" + "alt": "Riskified" } }, { "link": { - "href": "https://www.twilio.com", + "href": "https://get.betterup.com/", "blank": true }, "logo": { "asset": { - "_ref": "/img/logos/scrollingCompanies/betterup.webp" + "_ref": "https://cdn.sanity.io/images/cqo9wkgf/production/5988a55b3c090a12ddc3f3cae07b290ac3134771-360x180.png" }, - "alt": "Twilio" + "alt": "Betterup" + } + }, + { + "link": { + "href": "https://www.ovoenergy.com/", + "blank": true + }, + "logo": { + "asset": { + "_ref": "https://cdn.sanity.io/images/cqo9wkgf/production/5e7bd32dfbc769849dca136947ebd2fc2f5e91f3-540x270.png" + }, + "alt": "OVO Energy" } }, { @@ -43,45 +55,69 @@ }, "logo": { "asset": { - "_ref": "/img/logos/scrollingCompanies/depop.webp" + "_ref": "https://cdn.sanity.io/images/cqo9wkgf/production/3d7c10e1bd7c7a062250e092d6d9d0553fb57790-360x180.png" }, "alt": "Myob" } }, { "link": { - "href": "https://regeneron.com", + "href": "https://www.dpgmediagroup.com/", "blank": true }, "logo": { "asset": { - "_ref": "/img/logos/scrollingCompanies/dpg_media.webp" + "_ref": "https://cdn.sanity.io/images/cqo9wkgf/production/b446f595b4b13a72ee82a285924715f950e012ca-540x270.png" }, - "alt": "Regeneron" + "alt": "DPG Megia" } }, { "link": { - "href": "https://riskified.com", + "href": "https://www.notion.so/", "blank": true }, "logo": { "asset": { - "_ref": "/img/logos/scrollingCompanies/myob.webp" + "_ref": "https://cdn.sanity.io/images/cqo9wkgf/production/c2e84f93572cd1baf30ea7ab8da234ff44182eb6-540x270.png" }, - "alt": "Riskified" + "alt": "Notion" + } + }, + { + "link": { + "href": "https://www.sae.org/", + "blank": true + }, + "logo": { + "asset": { + "_ref": "https://cdn.sanity.io/images/cqo9wkgf/production/a9e8586635cb4039cbfc5836a6a5cacdeba9e6b3-540x270.png" + }, + "alt": "SAE International" + } + }, + { + "link": { + "href": "https://viator.com", + "blank": true + }, + "logo": { + "asset": { + "_ref": "https://cdn.sanity.io/images/cqo9wkgf/production/0c17141cad5baa053da18dffb17d9c182d242e69-1200x475.png" + }, + "alt": "Viator" } }, { "link": { - "href": "https://xero.com", + "href": "https://www.tripadvisor.co.uk/", "blank": true }, "logo": { "asset": { - "_ref": "/img/logos/scrollingCompanies/notion.webp" + "_ref": "https://cdn.sanity.io/images/cqo9wkgf/production/28255a28d261a074a83d1ee8632f0338bf5cf57e-1112x256.png" }, - "alt": "Xero" + "alt": "Trip Advisor" } } ] diff --git a/docs-website/src/pages/cloud/CompanyLogos/index.js b/docs-website/src/pages/cloud/CompanyLogos/index.js index cd3a79f277893..d395d5b50e48a 100644 --- a/docs-website/src/pages/cloud/CompanyLogos/index.js +++ b/docs-website/src/pages/cloud/CompanyLogos/index.js @@ -32,7 +32,7 @@ const ScrollingCustomers = ({ noOverlay = true, spacing, ...rest }) => { to={customer.link.href} target={customer.link.blank ? '_blank' : '_self'} rel={customer.link.blank ? 'noopener noreferrer' : ''} - style={{ minWidth: 'max-content', padding: '0 3.25rem' }} + style={{ minWidth: 'max-content', padding: '0 1.8rem' }} >