From 5b7487db80e0e9dfc12da6f0fac398deab00d3b4 Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Wed, 6 Sep 2023 10:46:53 -0500 Subject: [PATCH 01/20] docs(session): add documentation for session token duration and fix default (#8791) --- datahub-frontend/conf/application.conf | 2 +- docs/authentication/README.md | 3 ++- .../authentication/guides/sso/configure-oidc-react.md | 1 + docs/deploy/environment-vars.md | 11 ++++++----- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/datahub-frontend/conf/application.conf b/datahub-frontend/conf/application.conf index 2a3d7f395736e..18d901d5ee7dd 100644 --- a/datahub-frontend/conf/application.conf +++ b/datahub-frontend/conf/application.conf @@ -200,7 +200,7 @@ auth.native.enabled = ${?AUTH_NATIVE_ENABLED} # auth.oidc.enabled = false # (or simply omit oidc configurations) # Login session expiration time -auth.session.ttlInHours = 720 +auth.session.ttlInHours = 24 auth.session.ttlInHours = ${?AUTH_SESSION_TTL_HOURS} analytics.enabled = true diff --git a/docs/authentication/README.md b/docs/authentication/README.md index 4034cb15cfd22..f6eda88784486 100644 --- a/docs/authentication/README.md +++ b/docs/authentication/README.md @@ -31,7 +31,8 @@ When a user makes a request for Data within DataHub, the request is authenticate and programmatic calls to DataHub APIs. There are two types of tokens that are important: 1. **Session Tokens**: Generated for users of the DataHub web application. By default, having a duration of 24 hours. -These tokens are encoded and stored inside browser-side session cookies. +These tokens are encoded and stored inside browser-side session cookies. The duration a session token is valid for is configurable via the `AUTH_SESSION_TTL_HOURS` environment variable +on the datahub-frontend deployment. 2. **Personal Access Tokens**: These are tokens generated via the DataHub settings panel useful for interacting with DataHub APIs. They can be used to automate processes like enriching documentation, ownership, tags, and more on DataHub. Learn more about Personal Access Tokens [here](personal-access-tokens.md). diff --git a/docs/authentication/guides/sso/configure-oidc-react.md b/docs/authentication/guides/sso/configure-oidc-react.md index b7efb94f842d6..d27792ce3967b 100644 --- a/docs/authentication/guides/sso/configure-oidc-react.md +++ b/docs/authentication/guides/sso/configure-oidc-react.md @@ -72,6 +72,7 @@ AUTH_OIDC_BASE_URL=your-datahub-url - `AUTH_OIDC_CLIENT_SECRET`: Unique client secret received from identity provider - `AUTH_OIDC_DISCOVERY_URI`: Location of the identity provider OIDC discovery API. Suffixed with `.well-known/openid-configuration` - `AUTH_OIDC_BASE_URL`: The base URL of your DataHub deployment, e.g. https://yourorgdatahub.com (prod) or http://localhost:9002 (testing) +- `AUTH_SESSION_TTL_HOURS`: The length of time in hours before a user will be prompted to login again. Session tokens are stateless so this determines at what time a session token may no longer be used and a valid session token can be used until this time has passed. Providing these configs will cause DataHub to delegate authentication to your identity provider, requesting the "oidc email profile" scopes and parsing the "preferred_username" claim from diff --git a/docs/deploy/environment-vars.md b/docs/deploy/environment-vars.md index a09db7bc6b82e..0689db9b17331 100644 --- a/docs/deploy/environment-vars.md +++ b/docs/deploy/environment-vars.md @@ -79,8 +79,9 @@ Simply replace the dot, `.`, with an underscore, `_`, and convert to uppercase. ## Frontend -| Variable | Default | Unit/Type | Components | Description | -|------------------------------------|----------|-----------|--------------|-------------------------------------------------------------------------------------------------------------------------------------| -| `AUTH_VERBOSE_LOGGING` | `false` | boolean | [`Frontend`] | Enable verbose authentication logging. Enabling this will leak sensisitve information in the logs. Disable when finished debugging. | -| `AUTH_OIDC_GROUPS_CLAIM` | `groups` | string | [`Frontend`] | Claim to use as the user's group. | -| `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` | `false` | boolean | [`Frontend`] | Auto-provision the group from the user's group claim. | +| Variable | Default | Unit/Type | Components | Description | +|------------------------------------|----------|-----------|--------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `AUTH_VERBOSE_LOGGING` | `false` | boolean | [`Frontend`] | Enable verbose authentication logging. Enabling this will leak sensisitve information in the logs. Disable when finished debugging. | +| `AUTH_OIDC_GROUPS_CLAIM` | `groups` | string | [`Frontend`] | Claim to use as the user's group. | +| `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` | `false` | boolean | [`Frontend`] | Auto-provision the group from the user's group claim. | +| `AUTH_SESSION_TTL_HOURS` | `24` | string | [`Frontend`] | The number of hours a user session is valid. [User session tokens are stateless and will become invalid after this time](https://www.playframework.com/documentation/2.8.x/SettingsSession#Session-Timeout-/-Expiration) requiring a user to login again. | \ No newline at end of file From ac025e508d060ef4f741148dbdddeaef9de4b85e Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Wed, 6 Sep 2023 12:18:29 -0400 Subject: [PATCH 02/20] fix(ingest/datahub): Support postgres; build(postgres): Modernize postgres docker setup (#8762) --- docker/datahub-gms/env/docker.postgres.env | 20 +---- .../datahub-upgrade/env/docker.postgres.env | 7 ++ ...ompose-without-neo4j.postgres.override.yml | 76 +++++++++++++++++++ docker/docker-compose-without-neo4j.yml | 2 - docker/postgres-setup/Dockerfile | 2 +- docker/postgres/docker-compose.postgres.yml | 22 ------ docker/postgres/env/docker.env | 1 + docker/postgres/init.sql | 2 +- .../source/datahub/datahub_database_reader.py | 9 ++- 9 files changed, 92 insertions(+), 49 deletions(-) create mode 100644 docker/datahub-upgrade/env/docker.postgres.env create mode 100644 docker/docker-compose-without-neo4j.postgres.override.yml delete mode 100644 docker/postgres/docker-compose.postgres.yml diff --git a/docker/datahub-gms/env/docker.postgres.env b/docker/datahub-gms/env/docker.postgres.env index 25d31afa25804..529be3323a4d9 100644 --- a/docker/datahub-gms/env/docker.postgres.env +++ b/docker/datahub-gms/env/docker.postgres.env @@ -1,25 +1,7 @@ -DATAHUB_UPGRADE_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-duhe-consumer-job-client-gms -EBEAN_DATASOURCE_USERNAME=datahub -EBEAN_DATASOURCE_PASSWORD=datahub EBEAN_DATASOURCE_HOST=postgres:5432 EBEAN_DATASOURCE_URL=jdbc:postgresql://postgres:5432/datahub EBEAN_DATASOURCE_DRIVER=org.postgresql.Driver + # Uncomment EBEAN_POSTGRES_USE_AWS_IAM_AUTH below to add support for IAM authentication for Postgres. # Password is not required when accessing Postgres using IAM auth. It can be replaced by dummy password # EBEAN_POSTGRES_USE_AWS_IAM_AUTH=true -KAFKA_BOOTSTRAP_SERVER=broker:29092 -KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 -# KAFKA_SCHEMAREGISTRY_URL=http://datahub-gms:8080/schema-registry/api/ -ELASTICSEARCH_HOST=elasticsearch -ELASTICSEARCH_PORT=9200 -ES_BULK_REFRESH_POLICY=WAIT_UNTIL -ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true -ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true -NEO4J_HOST=http://neo4j:7474 -NEO4J_URI=bolt://neo4j -NEO4J_USERNAME=neo4j -NEO4J_PASSWORD=datahub -MAE_CONSUMER_ENABLED=true -MCE_CONSUMER_ENABLED=true -JAVA_OPTS=-Xms1g -Xmx1g -ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml diff --git a/docker/datahub-upgrade/env/docker.postgres.env b/docker/datahub-upgrade/env/docker.postgres.env new file mode 100644 index 0000000000000..529be3323a4d9 --- /dev/null +++ b/docker/datahub-upgrade/env/docker.postgres.env @@ -0,0 +1,7 @@ +EBEAN_DATASOURCE_HOST=postgres:5432 +EBEAN_DATASOURCE_URL=jdbc:postgresql://postgres:5432/datahub +EBEAN_DATASOURCE_DRIVER=org.postgresql.Driver + +# Uncomment EBEAN_POSTGRES_USE_AWS_IAM_AUTH below to add support for IAM authentication for Postgres. +# Password is not required when accessing Postgres using IAM auth. It can be replaced by dummy password +# EBEAN_POSTGRES_USE_AWS_IAM_AUTH=true diff --git a/docker/docker-compose-without-neo4j.postgres.override.yml b/docker/docker-compose-without-neo4j.postgres.override.yml new file mode 100644 index 0000000000000..e4c754b30afd7 --- /dev/null +++ b/docker/docker-compose-without-neo4j.postgres.override.yml @@ -0,0 +1,76 @@ +# Override to use PostgreSQL as a backing store for datahub-gms. +--- +version: '3.9' +services: + datahub-gms: + env_file: + - datahub-gms/env/docker-without-neo4j.env + - datahub-gms/env/docker.postgres.env + environment: + - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} + - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} + depends_on: + datahub-upgrade: + condition: service_completed_successfully + volumes: + - ${HOME}/.datahub/plugins:/etc/datahub/plugins + + datahub-upgrade: + container_name: datahub-upgrade + hostname: datahub-upgrade + image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head} + command: + - -u + - SystemUpdate + build: + context: ../ + dockerfile: docker/datahub-upgrade/Dockerfile + env_file: + - datahub-upgrade/env/docker-without-neo4j.env + - datahub-upgrade/env/docker.postgres.env + depends_on: + postgres-setup: + condition: service_completed_successfully + elasticsearch-setup: + condition: service_completed_successfully + kafka-setup: + condition: service_completed_successfully + + postgres-setup: + container_name: postgres-setup + hostname: postgres-setup + image: ${DATAHUB_POSTGRES_SETUP_IMAGE:-acryldata/datahub-postgres-setup}:${DATAHUB_VERSION:-head} + build: + context: ../ + dockerfile: docker/postgres-setup/Dockerfile + env_file: postgres-setup/env/docker.env + depends_on: + postgres: + condition: service_healthy + labels: + datahub_setup_job: true + + postgres: + container_name: postgres + hostname: postgres + image: postgres:12.3 + env_file: postgres/env/docker.env + ports: + - '5432:5432' + restart: on-failure + healthcheck: + test: [ "CMD-SHELL", "pg_isready" ] + start_period: 20s + interval: 2s + timeout: 10s + retries: 5 + volumes: + - ./init.sql:/docker-entrypoint-initdb.d/init.sql + - postgresdata:/var/lib/postgresql/data + + kafka-setup: + environment: + - DATAHUB_PRECREATE_TOPICS=${DATAHUB_PRECREATE_TOPICS:-false} + +volumes: + postgresdata: diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml index a755eda21cbf5..0b2e4f76b8fa9 100644 --- a/docker/docker-compose-without-neo4j.yml +++ b/docker/docker-compose-without-neo4j.yml @@ -66,8 +66,6 @@ services: dockerfile: docker/datahub-upgrade/Dockerfile env_file: datahub-upgrade/env/docker-without-neo4j.env depends_on: - mysql-setup: - condition: service_completed_successfully elasticsearch-setup: condition: service_completed_successfully kafka-setup: diff --git a/docker/postgres-setup/Dockerfile b/docker/postgres-setup/Dockerfile index 5701427d2d48a..313615ac3465b 100644 --- a/docker/postgres-setup/Dockerfile +++ b/docker/postgres-setup/Dockerfile @@ -10,10 +10,10 @@ WORKDIR /go/src/github.com/jwilder/dockerize RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION FROM alpine:3 +COPY --from=binary /go/bin/dockerize /usr/local/bin # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk add --no-cache postgresql-client sqlite -COPY --from=binary /go/bin/dockerize /usr/local/bin COPY docker/postgres-setup/init.sql /init.sql COPY docker/postgres-setup/init.sh /init.sh diff --git a/docker/postgres/docker-compose.postgres.yml b/docker/postgres/docker-compose.postgres.yml deleted file mode 100644 index 5eaf9478932b5..0000000000000 --- a/docker/postgres/docker-compose.postgres.yml +++ /dev/null @@ -1,22 +0,0 @@ -# Override to use PostgreSQL as a backing store for datahub-gms. ---- -version: '3.8' -services: - postgres: - container_name: postgres - hostname: postgres - image: postgres:12.3 - env_file: env/docker.env - ports: - - '5432:5432' - volumes: - - ./init.sql:/docker-entrypoint-initdb.d/init.sql - - datahub-gms: - env_file: ../datahub-gms/env/dev.postgres.env - depends_on: - - postgres - -networks: - default: - name: datahub_network diff --git a/docker/postgres/env/docker.env b/docker/postgres/env/docker.env index f84a2b5635190..aced4c441222b 100644 --- a/docker/postgres/env/docker.env +++ b/docker/postgres/env/docker.env @@ -1,2 +1,3 @@ POSTGRES_USER: datahub POSTGRES_PASSWORD: datahub +PGUSER: datahub diff --git a/docker/postgres/init.sql b/docker/postgres/init.sql index 4da8adaf8a6da..cf477c135422e 100644 --- a/docker/postgres/init.sql +++ b/docker/postgres/init.sql @@ -11,7 +11,7 @@ create table metadata_aspect_v2 ( constraint pk_metadata_aspect_v2 primary key (urn,aspect,version) ); -create index timeIndex ON metadata_aspect_v2 (createdon); +create index if not exists timeIndex ON metadata_aspect_v2 (createdon); insert into metadata_aspect_v2 (urn, aspect, version, metadata, createdon, createdby) values( 'urn:li:corpuser:datahub', diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py index 39702ba3ce347..a5aadbd6e246b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py @@ -38,14 +38,15 @@ def __init__( def query(self) -> str: # May repeat rows for the same date # Offset is generally 0, unless we repeat the same createdon twice + + # Ensures stable order, chronological per (urn, aspect) + # Version 0 last, only when createdon is the same. Otherwise relies on createdon order return f""" SELECT urn, aspect, metadata, systemmetadata, createdon - FROM `{self.config.database_table_name}` + FROM {self.engine.dialect.identifier_preparer.quote(self.config.database_table_name)} WHERE createdon >= %(since_createdon)s {"" if self.config.include_all_versions else "AND version = 0"} - ORDER BY createdon, urn, aspect, # Ensure stable order, chronological per (urn, aspect) - CASE WHEN version = 0 THEN 1 ELSE 0 END, version - # Version 0 last, only when createdon is the same. Otherwise relies on createdon order + ORDER BY createdon, urn, aspect, CASE WHEN version = 0 THEN 1 ELSE 0 END, version LIMIT %(limit)s OFFSET %(offset)s """ From f649d6fc17cdd0f3d79965fd61ce92f8e6d8ed2a Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Wed, 6 Sep 2023 23:08:35 +0530 Subject: [PATCH 03/20] feat(airflow-plugin): add package type information (#8795) --- metadata-ingestion-modules/airflow-plugin/setup.py | 3 +++ .../airflow-plugin/src/datahub_airflow_plugin/py.typed | 0 2 files changed, 3 insertions(+) create mode 100644 metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/py.typed diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index c5bdc7ea329cd..18e605ae76ebd 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -121,6 +121,9 @@ def get_long_description(): # Package info. zip_safe=False, python_requires=">=3.7", + package_data={ + "datahub_airflow_plugin": ["py.typed"], + }, package_dir={"": "src"}, packages=setuptools.find_namespace_packages(where="./src"), entry_points=entry_points, diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/py.typed b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/py.typed new file mode 100644 index 0000000000000..e69de29bb2d1d From 94fa5147001fa52f46c7e7ef74afa48dbda06561 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Wed, 6 Sep 2023 10:51:03 -0700 Subject: [PATCH 04/20] feat(systemMetadata): Adding a lastRunId field system metadata (#8672) Co-authored-by: Aseem Bansal Co-authored-by: Indy Prentice --- .../types/chart/mappers/ChartMapper.java | 2 +- .../types/common/mappers/util/RunInfo.java | 16 ++++++ .../mappers/util/SystemMetadataUtils.java | 46 ++++++++++++++--- .../container/mappers/ContainerMapper.java | 2 +- .../dashboard/mappers/DashboardMapper.java | 2 +- .../dataflow/mappers/DataFlowMapper.java | 2 +- .../types/datajob/mappers/DataJobMapper.java | 2 +- .../mappers/DataPlatformMapper.java | 2 +- .../types/dataset/mappers/DatasetMapper.java | 2 +- .../mlmodel/mappers/MLFeatureMapper.java | 2 +- .../mlmodel/mappers/MLFeatureTableMapper.java | 2 +- .../mlmodel/mappers/MLModelGroupMapper.java | 2 +- .../types/mlmodel/mappers/MLModelMapper.java | 2 +- .../mlmodel/mappers/MLPrimaryKeyMapper.java | 2 +- .../utils/SystemMetadataUtilsTest.java | 49 +++++++++++++++++-- .../resources/MetadataChangeProposal.avsc | 7 ++- .../metadata/entity/EntityServiceImpl.java | 7 +++ .../metadata/AspectGenerationUtils.java | 6 +++ .../metadata/entity/EntityServiceTest.java | 2 +- .../com/linkedin/mxe/SystemMetadata.pdl | 7 ++- .../com.linkedin.entity.aspects.snapshot.json | 8 ++- ...com.linkedin.entity.entities.snapshot.json | 8 ++- ...m.linkedin.entity.entitiesV2.snapshot.json | 8 ++- ...n.entity.entitiesVersionedV2.snapshot.json | 8 ++- 24 files changed, 165 insertions(+), 31 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/util/RunInfo.java diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/mappers/ChartMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/mappers/ChartMapper.java index 9e03bf19889d1..657c9b688aed2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/mappers/ChartMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/mappers/ChartMapper.java @@ -68,7 +68,7 @@ public Chart apply(@Nonnull final EntityResponse entityResponse) { result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.CHART); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); - Long lastIngested = SystemMetadataUtils.getLastIngested(aspectMap); + Long lastIngested = SystemMetadataUtils.getLastIngestedTime(aspectMap); result.setLastIngested(lastIngested); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/util/RunInfo.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/util/RunInfo.java new file mode 100644 index 0000000000000..7d1b374e1f9b6 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/util/RunInfo.java @@ -0,0 +1,16 @@ +package com.linkedin.datahub.graphql.types.common.mappers.util; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.Getter; +import lombok.Setter; + + +@Data +@Setter +@Getter +@AllArgsConstructor +public class RunInfo { + private final String id; + private final Long time; +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/util/SystemMetadataUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/util/SystemMetadataUtils.java index 731f4e85e0663..d08300d648c32 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/util/SystemMetadataUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/util/SystemMetadataUtils.java @@ -3,7 +3,10 @@ import com.linkedin.entity.EnvelopedAspectMap; import com.linkedin.mxe.SystemMetadata; +import java.util.ArrayList; +import java.util.List; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import static com.linkedin.metadata.Constants.DEFAULT_RUN_ID; @@ -12,19 +15,48 @@ public class SystemMetadataUtils { private SystemMetadataUtils() { } - public static Long getLastIngested(@Nonnull EnvelopedAspectMap aspectMap) { - Long lastIngested = null; + @Nullable + public static Long getLastIngestedTime(@Nonnull EnvelopedAspectMap aspectMap) { + RunInfo lastIngestionRun = getLastIngestionRun(aspectMap); + return lastIngestionRun != null ? lastIngestionRun.getTime() : null; + } + + @Nullable + public static String getLastIngestedRunId(@Nonnull EnvelopedAspectMap aspectMap) { + RunInfo lastIngestionRun = getLastIngestionRun(aspectMap); + return lastIngestionRun != null ? lastIngestionRun.getId() : null; + } + + /** + * Returns a sorted list of all of the most recent ingestion runs based on the most recent aspects present for the entity. + */ + @Nonnull + public static List getLastIngestionRuns(@Nonnull EnvelopedAspectMap aspectMap) { + final List runs = new ArrayList<>(); for (String aspect : aspectMap.keySet()) { if (aspectMap.get(aspect).hasSystemMetadata()) { SystemMetadata systemMetadata = aspectMap.get(aspect).getSystemMetadata(); - if (systemMetadata.hasRunId() && !systemMetadata.getRunId().equals(DEFAULT_RUN_ID) && systemMetadata.hasLastObserved()) { + if (systemMetadata.hasLastRunId() && !systemMetadata.getLastRunId().equals(DEFAULT_RUN_ID) && systemMetadata.hasLastObserved()) { Long lastObserved = systemMetadata.getLastObserved(); - if (lastIngested == null || lastObserved > lastIngested) { - lastIngested = lastObserved; - } + String runId = systemMetadata.getLastRunId(); + RunInfo run = new RunInfo(runId, lastObserved); + runs.add(run); + } else if (systemMetadata.hasRunId() && !systemMetadata.getRunId().equals(DEFAULT_RUN_ID) && systemMetadata.hasLastObserved()) { + // Handle the legacy case: Check original run ids. + Long lastObserved = systemMetadata.getLastObserved(); + String runId = systemMetadata.getRunId(); + RunInfo run = new RunInfo(runId, lastObserved); + runs.add(run); } } } - return lastIngested; + runs.sort((a, b) -> Long.compare(b.getTime(), a.getTime())); + return runs; + } + + @Nullable + private static RunInfo getLastIngestionRun(@Nonnull EnvelopedAspectMap aspectMap) { + List runs = getLastIngestionRuns(aspectMap); + return !runs.isEmpty() ? runs.get(0) : null; // Just take the first, to get the most recent run. } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java index ec559a569920d..aeaa8f4f85c14 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/container/mappers/ContainerMapper.java @@ -42,7 +42,7 @@ public static Container map(final EntityResponse entityResponse) { final Container result = new Container(); final Urn entityUrn = entityResponse.getUrn(); final EnvelopedAspectMap aspects = entityResponse.getAspects(); - Long lastIngested = SystemMetadataUtils.getLastIngested(aspects); + Long lastIngested = SystemMetadataUtils.getLastIngestedTime(aspects); result.setLastIngested(lastIngested); result.setUrn(entityUrn.toString()); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/mappers/DashboardMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/mappers/DashboardMapper.java index 38e2cacbde668..32e4341ece4aa 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/mappers/DashboardMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/mappers/DashboardMapper.java @@ -67,7 +67,7 @@ public Dashboard apply(@Nonnull final EntityResponse entityResponse) { result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.DASHBOARD); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); - Long lastIngested = SystemMetadataUtils.getLastIngested(aspectMap); + Long lastIngested = SystemMetadataUtils.getLastIngestedTime(aspectMap); result.setLastIngested(lastIngested); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapper.java index 98debe08cf36b..719fa9f0b2bf0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/mappers/DataFlowMapper.java @@ -57,7 +57,7 @@ public DataFlow apply(@Nonnull final EntityResponse entityResponse) { Urn entityUrn = entityResponse.getUrn(); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); - Long lastIngested = SystemMetadataUtils.getLastIngested(aspectMap); + Long lastIngested = SystemMetadataUtils.getLastIngestedTime(aspectMap); result.setLastIngested(lastIngested); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java index 208a85acfe42e..4845fc1876348 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/mappers/DataJobMapper.java @@ -60,7 +60,7 @@ public DataJob apply(@Nonnull final EntityResponse entityResponse) { result.setType(EntityType.DATA_JOB); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); - Long lastIngested = SystemMetadataUtils.getLastIngested(aspectMap); + Long lastIngested = SystemMetadataUtils.getLastIngestedTime(aspectMap); result.setLastIngested(lastIngested); entityResponse.getAspects().forEach((name, aspect) -> { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatform/mappers/DataPlatformMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatform/mappers/DataPlatformMapper.java index 0f4b90cd6b3b4..8df44e8f6e9e9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatform/mappers/DataPlatformMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataplatform/mappers/DataPlatformMapper.java @@ -35,7 +35,7 @@ public DataPlatform apply(@Nonnull final EntityResponse entityResponse) { result.setName(dataPlatformKey.getPlatformName()); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); - Long lastIngested = SystemMetadataUtils.getLastIngested(aspectMap); + Long lastIngested = SystemMetadataUtils.getLastIngestedTime(aspectMap); result.setLastIngested(lastIngested); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java index f0899f8fbc0cb..40e0432f9ed39 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java @@ -75,7 +75,7 @@ public Dataset apply(@Nonnull final EntityResponse entityResponse) { result.setType(EntityType.DATASET); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); - Long lastIngested = SystemMetadataUtils.getLastIngested(aspectMap); + Long lastIngested = SystemMetadataUtils.getLastIngestedTime(aspectMap); result.setLastIngested(lastIngested); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureMapper.java index c924384756ac8..010ae477251f3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureMapper.java @@ -60,7 +60,7 @@ public MLFeature apply(@Nonnull final EntityResponse entityResponse) { result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.MLFEATURE); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); - Long lastIngested = SystemMetadataUtils.getLastIngested(aspectMap); + Long lastIngested = SystemMetadataUtils.getLastIngestedTime(aspectMap); result.setLastIngested(lastIngested); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTableMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTableMapper.java index f0e7cffe5578d..3ba9a76c4bdde 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTableMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLFeatureTableMapper.java @@ -59,7 +59,7 @@ public MLFeatureTable apply(@Nonnull final EntityResponse entityResponse) { result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.MLFEATURE_TABLE); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); - Long lastIngested = SystemMetadataUtils.getLastIngested(aspectMap); + Long lastIngested = SystemMetadataUtils.getLastIngestedTime(aspectMap); result.setLastIngested(lastIngested); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupMapper.java index 52db5468e0091..311ee121bcaf9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelGroupMapper.java @@ -57,7 +57,7 @@ public MLModelGroup apply(@Nonnull final EntityResponse entityResponse) { result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.MLMODEL_GROUP); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); - Long lastIngested = SystemMetadataUtils.getLastIngested(aspectMap); + Long lastIngested = SystemMetadataUtils.getLastIngestedTime(aspectMap); result.setLastIngested(lastIngested); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java index 414ba5d196d6b..0c2eeabe5701d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java @@ -71,7 +71,7 @@ public MLModel apply(@Nonnull final EntityResponse entityResponse) { result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.MLMODEL); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); - Long lastIngested = SystemMetadataUtils.getLastIngested(aspectMap); + Long lastIngested = SystemMetadataUtils.getLastIngestedTime(aspectMap); result.setLastIngested(lastIngested); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyMapper.java index 533c0f60a930a..0bd5db4d884ae 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLPrimaryKeyMapper.java @@ -56,7 +56,7 @@ public MLPrimaryKey apply(@Nonnull final EntityResponse entityResponse) { result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.MLPRIMARY_KEY); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); - Long lastIngested = SystemMetadataUtils.getLastIngested(aspectMap); + Long lastIngested = SystemMetadataUtils.getLastIngestedTime(aspectMap); result.setLastIngested(lastIngested); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/SystemMetadataUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/SystemMetadataUtilsTest.java index dc9b0d97812ae..adbc6808b5ab9 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/SystemMetadataUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/SystemMetadataUtilsTest.java @@ -1,9 +1,11 @@ package com.linkedin.datahub.graphql.utils; +import com.linkedin.datahub.graphql.types.common.mappers.util.RunInfo; import com.linkedin.datahub.graphql.types.common.mappers.util.SystemMetadataUtils; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.entity.EnvelopedAspectMap; import com.linkedin.mxe.SystemMetadata; +import java.util.List; import org.testng.annotations.Test; import static org.testng.Assert.*; @@ -17,7 +19,7 @@ public class SystemMetadataUtilsTest { private final Long distantLastObserved = 1657226036292L; @Test - public void testGetLastIngested() { + public void testGetLastIngestedTime() { EnvelopedAspectMap aspectMap = new EnvelopedAspectMap(); aspectMap.put("default-run-id", new EnvelopedAspect().setSystemMetadata( new SystemMetadata().setRunId(DEFAULT_RUN_ID).setLastObserved(recentLastObserved) @@ -29,12 +31,49 @@ public void testGetLastIngested() { new SystemMetadata().setRunId("real-id-2").setLastObserved(distantLastObserved) )); - Long lastObserved = SystemMetadataUtils.getLastIngested(aspectMap); + Long lastObserved = SystemMetadataUtils.getLastIngestedTime(aspectMap); assertEquals(lastObserved, mediumLastObserved); } @Test - public void testGetLastIngestedAllDefaultRunIds() { + public void testGetLastIngestedRunId() { + EnvelopedAspectMap aspectMap = new EnvelopedAspectMap(); + aspectMap.put("default-run-id", new EnvelopedAspect().setSystemMetadata( + new SystemMetadata().setRunId(DEFAULT_RUN_ID).setLastObserved(recentLastObserved) + )); + aspectMap.put("real-run-id", new EnvelopedAspect().setSystemMetadata( + new SystemMetadata().setRunId("real-id-1").setLastObserved(mediumLastObserved) + )); + aspectMap.put("real-run-id2", new EnvelopedAspect().setSystemMetadata( + new SystemMetadata().setRunId("real-id-2").setLastObserved(distantLastObserved) + )); + + String lastRunId = SystemMetadataUtils.getLastIngestedRunId(aspectMap); + assertEquals(lastRunId, "real-id-1"); + } + + @Test + public void testGetLastIngestedRuns() { + EnvelopedAspectMap aspectMap = new EnvelopedAspectMap(); + aspectMap.put("default-run-id", new EnvelopedAspect().setSystemMetadata( + new SystemMetadata().setRunId(DEFAULT_RUN_ID).setLastObserved(recentLastObserved) + )); + aspectMap.put("real-run-id", new EnvelopedAspect().setSystemMetadata( + new SystemMetadata().setRunId("real-id-1").setLastObserved(mediumLastObserved) + )); + aspectMap.put("real-run-id2", new EnvelopedAspect().setSystemMetadata( + new SystemMetadata().setRunId("real-id-2").setLastObserved(distantLastObserved) + )); + + List runs = SystemMetadataUtils.getLastIngestionRuns(aspectMap); + + assertEquals(runs.size(), 2); + assertEquals(runs.get(0), new RunInfo("real-id-1", mediumLastObserved)); + assertEquals(runs.get(1), new RunInfo("real-id-2", distantLastObserved)); + } + + @Test + public void testGetLastIngestedTimeAllDefaultRunIds() { EnvelopedAspectMap aspectMap = new EnvelopedAspectMap(); aspectMap.put("default-run-id", new EnvelopedAspect().setSystemMetadata( new SystemMetadata().setRunId(DEFAULT_RUN_ID).setLastObserved(recentLastObserved) @@ -46,7 +85,7 @@ public void testGetLastIngestedAllDefaultRunIds() { new SystemMetadata().setRunId(DEFAULT_RUN_ID).setLastObserved(distantLastObserved) )); - Long lastObserved = SystemMetadataUtils.getLastIngested(aspectMap); + Long lastObserved = SystemMetadataUtils.getLastIngestedTime(aspectMap); assertNull(lastObserved, null); } @@ -54,7 +93,7 @@ public void testGetLastIngestedAllDefaultRunIds() { public void testGetLastIngestedNoAspects() { EnvelopedAspectMap aspectMap = new EnvelopedAspectMap(); - Long lastObserved = SystemMetadataUtils.getLastIngested(aspectMap); + Long lastObserved = SystemMetadataUtils.getLastIngestedTime(aspectMap); assertNull(lastObserved, null); } } diff --git a/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc b/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc index 8a4e6028ea268..6a723090fda07 100644 --- a/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc +++ b/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc @@ -136,7 +136,12 @@ }, { "name" : "runId", "type" : [ "string", "null" ], - "doc" : "The run id that produced the metadata. Populated in case of batch-ingestion.", + "doc" : "The original run id that produced the metadata. Populated in case of batch-ingestion.", + "default" : "no-run-id-provided" + }, { + "name" : "lastRunId", + "type" : [ "string", "null" ], + "doc" : "The last run id that produced the metadata. Populated in case of batch-ingestion.", "default" : "no-run-id-provided" }, { "name" : "registryName", diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 03b81cfc703c4..2cacdc7d38fc6 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -1,6 +1,8 @@ package com.linkedin.metadata.entity; import com.codahale.metrics.Timer; +import com.linkedin.data.template.GetMode; +import com.linkedin.data.template.SetMode; import com.linkedin.metadata.config.PreProcessHooks; import com.datahub.util.RecordUtils; import com.datahub.util.exception.ModelConversionException; @@ -1805,6 +1807,10 @@ private UpdateAspectResult ingestAspectToLocalDB( @Nullable final EntityAspect latest, @Nonnull final Long nextVersion) { + // Set the "last run id" to be the run id provided with the new system metadata. This will be stored in index + // for all aspects that have a run id, regardless of whether they change. + providedSystemMetadata.setLastRunId(providedSystemMetadata.getRunId(GetMode.NULL), SetMode.IGNORE_NULL); + // 2. Compare the latest existing and new. final RecordTemplate oldValue = latest == null ? null : EntityUtils.toAspectRecord(urn, aspectName, latest.getMetadata(), getEntityRegistry()); @@ -1814,6 +1820,7 @@ private UpdateAspectResult ingestAspectToLocalDB( if (oldValue != null && DataTemplateUtil.areEqual(oldValue, newValue)) { SystemMetadata latestSystemMetadata = EntityUtils.parseSystemMetadata(latest.getSystemMetadata()); latestSystemMetadata.setLastObserved(providedSystemMetadata.getLastObserved()); + latestSystemMetadata.setLastRunId(providedSystemMetadata.getLastRunId(GetMode.NULL), SetMode.IGNORE_NULL); latest.setSystemMetadata(RecordUtils.toJsonString(latestSystemMetadata)); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/AspectGenerationUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/AspectGenerationUtils.java index 58b5d9ab2d719..e9183738c786c 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/AspectGenerationUtils.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/AspectGenerationUtils.java @@ -33,9 +33,15 @@ public static SystemMetadata createSystemMetadata() { @Nonnull public static SystemMetadata createSystemMetadata(long lastObserved, @Nonnull String runId) { + return createSystemMetadata(lastObserved, runId, runId); + } + + @Nonnull + public static SystemMetadata createSystemMetadata(long lastObserved, @Nonnull String runId, @Nonnull String lastRunId) { SystemMetadata metadata = new SystemMetadata(); metadata.setLastObserved(lastObserved); metadata.setRunId(runId); + metadata.setLastRunId(lastRunId); return metadata; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index c0d2a3783c0a7..f205adc128ed2 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -1092,7 +1092,7 @@ public void testIngestSameAspect() throws AssertionError { SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(1625792689, "run-123"); SystemMetadata metadata2 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-456"); - SystemMetadata metadata3 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-123"); + SystemMetadata metadata3 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-123", "run-456"); List items = List.of( UpsertBatchItem.builder() diff --git a/metadata-models/src/main/pegasus/com/linkedin/mxe/SystemMetadata.pdl b/metadata-models/src/main/pegasus/com/linkedin/mxe/SystemMetadata.pdl index e0f355229c912..101557fca7949 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/mxe/SystemMetadata.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/mxe/SystemMetadata.pdl @@ -10,10 +10,15 @@ record SystemMetadata { lastObserved: optional long = 0 /** - * The run id that produced the metadata. Populated in case of batch-ingestion. + * The original run id that produced the metadata. Populated in case of batch-ingestion. */ runId: optional string = "no-run-id-provided" + /** + * The last run id that produced the metadata. Populated in case of batch-ingestion. + */ + lastRunId: optional string = "no-run-id-provided" + /** * The ingestion pipeline id that produced the metadata. Populated in case of batch ingestion. */ diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index ee6318026e27d..0403fa2ceea6f 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -69,7 +69,13 @@ }, { "name" : "runId", "type" : "string", - "doc" : "The run id that produced the metadata. Populated in case of batch-ingestion.", + "doc" : "The original run id that produced the metadata. Populated in case of batch-ingestion.", + "default" : "no-run-id-provided", + "optional" : true + }, { + "name" : "lastRunId", + "type" : "string", + "doc" : "The last run id that produced the metadata. Populated in case of batch-ingestion.", "default" : "no-run-id-provided", "optional" : true }, { diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index d63a938bbce9d..d79a4a1919af9 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -6249,7 +6249,13 @@ }, { "name" : "runId", "type" : "string", - "doc" : "The run id that produced the metadata. Populated in case of batch-ingestion.", + "doc" : "The original run id that produced the metadata. Populated in case of batch-ingestion.", + "default" : "no-run-id-provided", + "optional" : true + }, { + "name" : "lastRunId", + "type" : "string", + "doc" : "The last run id that produced the metadata. Populated in case of batch-ingestion.", "default" : "no-run-id-provided", "optional" : true }, { diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json index 0b31bf9683d0c..c7618e5d3c5a1 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json @@ -114,7 +114,13 @@ }, { "name" : "runId", "type" : "string", - "doc" : "The run id that produced the metadata. Populated in case of batch-ingestion.", + "doc" : "The original run id that produced the metadata. Populated in case of batch-ingestion.", + "default" : "no-run-id-provided", + "optional" : true + }, { + "name" : "lastRunId", + "type" : "string", + "doc" : "The last run id that produced the metadata. Populated in case of batch-ingestion.", "default" : "no-run-id-provided", "optional" : true }, { diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesVersionedV2.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesVersionedV2.snapshot.json index 24a4ec2cc6802..45e542883b723 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesVersionedV2.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesVersionedV2.snapshot.json @@ -123,7 +123,13 @@ }, { "name" : "runId", "type" : "string", - "doc" : "The run id that produced the metadata. Populated in case of batch-ingestion.", + "doc" : "The original run id that produced the metadata. Populated in case of batch-ingestion.", + "default" : "no-run-id-provided", + "optional" : true + }, { + "name" : "lastRunId", + "type" : "string", + "doc" : "The last run id that produced the metadata. Populated in case of batch-ingestion.", "default" : "no-run-id-provided", "optional" : true }, { From 25148f4a6590e74a35deefe4c329590e9bed94c4 Mon Sep 17 00:00:00 2001 From: dominik s Date: Wed, 6 Sep 2023 21:12:14 +0200 Subject: [PATCH 05/20] refactor(ingest): Add support for group-owners in dataflow entities (#8154) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Dominik Schüssele Co-authored-by: Andrew Sikowitz --- .../src/datahub/api/entities/datajob/dataflow.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py b/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py index fdb9a30083ca4..8a04768bc0a72 100644 --- a/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py +++ b/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py @@ -65,6 +65,7 @@ class DataFlow: url: Optional[str] = None tags: Set[str] = field(default_factory=set) owners: Set[str] = field(default_factory=set) + group_owners: Set[str] = field(default_factory=set) platform_instance: Optional[str] = None env: Optional[str] = None @@ -92,17 +93,20 @@ def __post_init__(self): ) def generate_ownership_aspect(self): + owners = set([builder.make_user_urn(owner) for owner in self.owners]) | set( + [builder.make_group_urn(owner) for owner in self.group_owners] + ) ownership = OwnershipClass( owners=[ OwnerClass( - owner=builder.make_user_urn(owner), + owner=urn, type=OwnershipTypeClass.DEVELOPER, source=OwnershipSourceClass( type=OwnershipSourceTypeClass.SERVICE, # url=dag.filepath, ), ) - for owner in (self.owners or []) + for urn in (owners or []) ], lastModified=AuditStampClass( time=0, actor=builder.make_user_urn(self.orchestrator) From 8bf28bfa9230994fa7cc0aafa57a00560147b201 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Thu, 7 Sep 2023 01:17:36 +0530 Subject: [PATCH 06/20] fix(ingest/tableau): fix tableau native CLL for snowflake, add type annotations (#8779) Co-authored-by: Harshal Sheth --- .../source/snowflake/snowflake_config.py | 10 ++ .../source/snowflake/snowflake_lineage_v2.py | 29 +++- .../source/snowflake/snowflake_usage_v2.py | 8 +- .../source/snowflake/snowflake_v2.py | 16 +- .../src/datahub/ingestion/source/tableau.py | 159 ++++++++++-------- 5 files changed, 131 insertions(+), 91 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py index af99faf6e6396..0bc8bb17934f7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py @@ -23,6 +23,7 @@ SnowflakeConfig, ) from datahub.ingestion.source_config.usage.snowflake_usage import SnowflakeUsageConfig +from datahub.utilities.global_warning_util import add_global_warning logger = logging.Logger(__name__) @@ -156,6 +157,15 @@ class SnowflakeV2Config( description="Format user urns as an email, if the snowflake user's email is set. If `email_domain` is provided, generates email addresses for snowflake users with unset emails, based on their username.", ) + @validator("convert_urns_to_lowercase") + def validate_convert_urns_to_lowercase(cls, v): + if not v: + add_global_warning( + "Please use `convert_urns_to_lowercase: True`, otherwise lineage to other sources may not work correctly." + ) + + return v + @validator("include_column_lineage") def validate_include_column_lineage(cls, v, values): if not values.get("include_table_lineage") and v: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py index cee3a2926520f..9a993f5774032 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py @@ -404,7 +404,9 @@ def _populate_external_lineage_map(self, discovered_tables: List[str]) -> None: # Handles the case for explicitly created external tables. # NOTE: Snowflake does not log this information to the access_history table. - def _populate_external_lineage_from_show_query(self, discovered_tables): + def _populate_external_lineage_from_show_query( + self, discovered_tables: List[str] + ) -> None: external_tables_query: str = SnowflakeQuery.show_external_tables() try: for db_row in self.query(external_tables_query): @@ -455,7 +457,9 @@ def _populate_external_lineage_from_copy_history( ) self.report_status(EXTERNAL_LINEAGE, False) - def _process_external_lineage_result_row(self, db_row, discovered_tables): + def _process_external_lineage_result_row( + self, db_row: dict, discovered_tables: List[str] + ) -> None: # key is the down-stream table name key: str = self.get_dataset_identifier_from_qualified_name( db_row["DOWNSTREAM_TABLE_NAME"] @@ -475,7 +479,7 @@ def _process_external_lineage_result_row(self, db_row, discovered_tables): f"ExternalLineage[Table(Down)={key}]:External(Up)={self._external_lineage_map[key]} via access_history" ) - def _fetch_upstream_lineages_for_tables(self): + def _fetch_upstream_lineages_for_tables(self) -> Iterable[Dict]: query: str = SnowflakeQuery.table_to_table_lineage_history_v2( start_time_millis=int(self.start_time.timestamp() * 1000), end_time_millis=int(self.end_time.timestamp() * 1000), @@ -498,7 +502,9 @@ def _fetch_upstream_lineages_for_tables(self): ) self.report_status(TABLE_LINEAGE, False) - def map_query_result_upstreams(self, upstream_tables): + def map_query_result_upstreams( + self, upstream_tables: Optional[List[dict]] + ) -> List[UpstreamClass]: if not upstream_tables: return [] upstreams: List[UpstreamClass] = [] @@ -510,7 +516,9 @@ def map_query_result_upstreams(self, upstream_tables): logger.debug(e, exc_info=e) return upstreams - def _process_add_single_upstream(self, upstreams, upstream_table): + def _process_add_single_upstream( + self, upstreams: List[UpstreamClass], upstream_table: dict + ) -> None: upstream_name = self.get_dataset_identifier_from_qualified_name( upstream_table["upstream_object_name"] ) @@ -524,7 +532,9 @@ def _process_add_single_upstream(self, upstreams, upstream_table): ) ) - def map_query_result_fine_upstreams(self, dataset_urn, column_wise_upstreams): + def map_query_result_fine_upstreams( + self, dataset_urn: str, column_wise_upstreams: Optional[List[dict]] + ) -> List[FineGrainedLineage]: if not column_wise_upstreams: return [] fine_upstreams: List[FineGrainedLineage] = [] @@ -539,8 +549,11 @@ def map_query_result_fine_upstreams(self, dataset_urn, column_wise_upstreams): return fine_upstreams def _process_add_single_column_upstream( - self, dataset_urn, fine_upstreams, column_with_upstreams - ): + self, + dataset_urn: str, + fine_upstreams: List[FineGrainedLineage], + column_with_upstreams: Dict, + ) -> None: column_name = column_with_upstreams["column_name"] upstream_jobs = column_with_upstreams["upstreams"] if column_name and upstream_jobs: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py index a64921ea01759..d041d219c4bdd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py @@ -245,7 +245,9 @@ def _get_workunits_internal( yield from self.build_usage_statistics_for_dataset(dataset_identifier, row) - def build_usage_statistics_for_dataset(self, dataset_identifier, row): + def build_usage_statistics_for_dataset( + self, dataset_identifier: str, row: dict + ) -> Iterable[MetadataWorkUnit]: try: stats = DatasetUsageStatistics( timestampMillis=int(row["BUCKET_START_TIME"].timestamp() * 1000), @@ -357,7 +359,7 @@ def _make_operations_query(self) -> str: end_time = int(self.end_time.timestamp() * 1000) return SnowflakeQuery.operational_data_for_time_window(start_time, end_time) - def _check_usage_date_ranges(self) -> Any: + def _check_usage_date_ranges(self) -> None: with PerfTimer() as timer: try: results = self.query(SnowflakeQuery.get_access_history_date_range()) @@ -477,7 +479,7 @@ def _process_snowflake_history_row( f"Failed to parse operation history row {event_dict}, {e}", ) - def parse_event_objects(self, event_dict): + def parse_event_objects(self, event_dict: Dict) -> None: event_dict["BASE_OBJECTS_ACCESSED"] = [ obj for obj in json.loads(event_dict["BASE_OBJECTS_ACCESSED"]) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index e561ed0e2d146..811ea67981e18 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -1165,7 +1165,7 @@ def gen_schema_metadata( foreign_keys: Optional[List[ForeignKeyConstraint]] = None if isinstance(table, SnowflakeTable) and len(table.foreign_keys) > 0: - foreign_keys = self.build_foreign_keys(table, dataset_urn, foreign_keys) + foreign_keys = self.build_foreign_keys(table, dataset_urn) schema_metadata = SchemaMetadata( schemaName=dataset_name, @@ -1211,7 +1211,9 @@ def gen_schema_metadata( return schema_metadata - def build_foreign_keys(self, table, dataset_urn, foreign_keys): + def build_foreign_keys( + self, table: SnowflakeTable, dataset_urn: str + ) -> List[ForeignKeyConstraint]: foreign_keys = [] for fk in table.foreign_keys: foreign_dataset = make_dataset_urn( @@ -1428,7 +1430,7 @@ def get_fk_constraints_for_table( # Access to table but none of its constraints - is this possible ? return constraints.get(table_name, []) - def add_config_to_report(self): + def add_config_to_report(self) -> None: self.report.cleaned_account_id = self.config.get_account() self.report.ignore_start_time_lineage = self.config.ignore_start_time_lineage self.report.upstream_lineage_in_report = self.config.upstream_lineage_in_report @@ -1481,7 +1483,9 @@ def inspect_session_metadata(self) -> None: # that would be expensive, hence not done. To compensale for possibility # of some null values in collected sample, we fetch extra (20% more) # rows than configured sample_size. - def get_sample_values_for_table(self, table_name, schema_name, db_name): + def get_sample_values_for_table( + self, table_name: str, schema_name: str, db_name: str + ) -> pd.DataFrame: # Create a cursor object. logger.debug( f"Collecting sample values for table {db_name}.{schema_name}.{table_name}" @@ -1562,7 +1566,7 @@ def get_snowsight_base_url(self) -> Optional[str]: ) return None - def is_standard_edition(self): + def is_standard_edition(self) -> bool: try: self.query(SnowflakeQuery.show_tags()) return False @@ -1571,7 +1575,7 @@ def is_standard_edition(self): return True raise - def _snowflake_clear_ocsp_cache(self): + def _snowflake_clear_ocsp_cache(self) -> None: # Because of some issues with the Snowflake Python connector, we wipe the OCSP cache. # # Why is this necessary: diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index ec0af37089b1d..4cc00a66116e9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -138,6 +138,7 @@ ) from datahub.utilities import config_clean from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, SqlParsingResult +from datahub.utilities.urns.dataset_urn import DatasetUrn logger: logging.Logger = logging.getLogger(__name__) @@ -504,7 +505,7 @@ def close(self) -> None: self.server = None super().close() - def _populate_usage_stat_registry(self): + def _populate_usage_stat_registry(self) -> None: if self.server is None: return @@ -645,7 +646,7 @@ def _init_workbook_registry(self) -> None: continue self.workbook_project_map[wb.id] = wb.project_id - def _populate_projects_registry(self): + def _populate_projects_registry(self) -> None: if self.server is None: return @@ -666,7 +667,7 @@ def _populate_projects_registry(self): f"Tableau workbooks {self.workbook_project_map}", ) - def _authenticate(self): + def _authenticate(self) -> None: try: self.server = self.config.make_tableau_client() logger.info("Authenticated to Tableau server") @@ -841,16 +842,14 @@ def _track_custom_sql_ids(self, field: dict) -> None: def _create_upstream_table_lineage( self, datasource: dict, - browse_path: str, + browse_path: Optional[str], is_embedded_ds: bool = False, ) -> Tuple: upstream_tables: List[Upstream] = [] fine_grained_lineages: List[FineGrainedLineage] = [] table_id_to_urn = {} - upstream_datasources = self.get_upstream_datasources( - datasource, upstream_tables - ) + upstream_datasources = self.get_upstream_datasources(datasource) upstream_tables.extend(upstream_datasources) # When tableau workbook connects to published datasource, it creates an embedded @@ -875,7 +874,7 @@ def _create_upstream_table_lineage( # This adds an edge to upstream CustomSQLTables using `fields`.`upstreamColumns`.`table` csql_upstreams, csql_id_to_urn = self.get_upstream_csql_tables( - datasource.get(tableau_constant.FIELDS), + datasource.get(tableau_constant.FIELDS) or [], ) upstream_tables.extend(csql_upstreams) table_id_to_urn.update(csql_id_to_urn) @@ -914,7 +913,7 @@ def _create_upstream_table_lineage( return upstream_tables, fine_grained_lineages - def get_upstream_datasources(self, datasource, upstream_tables): + def get_upstream_datasources(self, datasource: dict) -> List[Upstream]: upstream_tables = [] for ds in datasource.get(tableau_constant.UPSTREAM_DATA_SOURCES, []): if ds[tableau_constant.ID] not in self.datasource_ids_being_used: @@ -933,14 +932,16 @@ def get_upstream_datasources(self, datasource, upstream_tables): upstream_tables.append(upstream_table) return upstream_tables - def get_upstream_csql_tables(self, fields): + def get_upstream_csql_tables( + self, fields: List[dict] + ) -> Tuple[List[Upstream], Dict[str, str]]: upstream_csql_urns = set() csql_id_to_urn = {} for field in fields: if not field.get(tableau_constant.UPSTREAM_COLUMNS): continue - for upstream_col in field.get(tableau_constant.UPSTREAM_COLUMNS): + for upstream_col in field[tableau_constant.UPSTREAM_COLUMNS]: if ( upstream_col and upstream_col.get(tableau_constant.TABLE) @@ -968,7 +969,13 @@ def get_upstream_csql_tables(self, fields): for csql_urn in upstream_csql_urns ], csql_id_to_urn - def get_upstream_tables(self, tables, datasource_name, browse_path, is_custom_sql): + def get_upstream_tables( + self, + tables: List[dict], + datasource_name: Optional[str], + browse_path: Optional[str], + is_custom_sql: bool, + ) -> Tuple[List[Upstream], Dict[str, str]]: upstream_tables = [] # Same table urn can be used when setting fine grained lineage, table_id_to_urn: Dict[str, str] = {} @@ -1056,12 +1063,12 @@ def get_upstream_tables(self, tables, datasource_name, browse_path, is_custom_sq def get_upstream_columns_of_fields_in_datasource( self, - datasource, - datasource_urn, - table_id_to_urn, - ): + datasource: dict, + datasource_urn: str, + table_id_to_urn: Dict[str, str], + ) -> List[FineGrainedLineage]: fine_grained_lineages = [] - for field in datasource.get(tableau_constant.FIELDS): + for field in datasource.get(tableau_constant.FIELDS) or []: field_name = field.get(tableau_constant.NAME) # upstreamColumns lineage will be set via upstreamFields. # such as for CalculatedField @@ -1086,9 +1093,14 @@ def get_upstream_columns_of_fields_in_datasource( and upstream_table_id and upstream_table_id in table_id_to_urn.keys() ): + parent_dataset_urn = table_id_to_urn[upstream_table_id] + if self.is_snowflake_urn(parent_dataset_urn): + # This is required for column level lineage to work correctly as + # DataHub Snowflake source lowercases all field names in the schema. + name = name.lower() input_columns.append( builder.make_schema_field_urn( - parent_urn=table_id_to_urn[upstream_table_id], + parent_urn=parent_dataset_urn, field_path=name, ) ) @@ -1107,9 +1119,19 @@ def get_upstream_columns_of_fields_in_datasource( return fine_grained_lineages - def get_upstream_fields_of_field_in_datasource(self, datasource, datasource_urn): + def is_snowflake_urn(self, urn: str) -> bool: + return ( + DatasetUrn.create_from_string(urn) + .get_data_platform_urn() + .get_platform_name() + == "snowflake" + ) + + def get_upstream_fields_of_field_in_datasource( + self, datasource: dict, datasource_urn: str + ) -> List[FineGrainedLineage]: fine_grained_lineages = [] - for field in datasource.get(tableau_constant.FIELDS): + for field in datasource.get(tableau_constant.FIELDS) or []: field_name = field.get(tableau_constant.NAME) # It is observed that upstreamFields gives one-hop field # lineage, and not multi-hop field lineage @@ -1205,7 +1227,7 @@ def get_upstream_fields_from_custom_sql( return fine_grained_lineages - def get_transform_operation(self, field): + def get_transform_operation(self, field: dict) -> str: field_type = field[tableau_constant.TYPE_NAME] if field_type in ( tableau_constant.DATA_SOURCE_FIELD, @@ -1381,7 +1403,7 @@ def get_schema_metadata_for_custom_sql( ) return schema_metadata - def _get_published_datasource_project_luid(self, ds): + def _get_published_datasource_project_luid(self, ds: dict) -> Optional[str]: # This is fallback in case "get all datasources" query fails for some reason. # It is possible due to https://github.com/tableau/server-client-python/issues/1210 if ( @@ -1430,7 +1452,7 @@ def _query_published_datasource_for_project_luid(self, ds_luid: str) -> None: ) logger.debug("Error stack trace", exc_info=True) - def _get_workbook_project_luid(self, wb): + def _get_workbook_project_luid(self, wb: dict) -> Optional[str]: if wb.get(tableau_constant.LUID) and self.workbook_project_map.get( wb[tableau_constant.LUID] ): @@ -1440,11 +1462,12 @@ def _get_workbook_project_luid(self, wb): return None - def _get_embedded_datasource_project_luid(self, ds): + def _get_embedded_datasource_project_luid(self, ds: dict) -> Optional[str]: if ds.get(tableau_constant.WORKBOOK): project_luid: Optional[str] = self._get_workbook_project_luid( - ds.get(tableau_constant.WORKBOOK) + ds[tableau_constant.WORKBOOK] ) + if project_luid and project_luid in self.tableau_project_registry: return project_luid @@ -1454,7 +1477,7 @@ def _get_embedded_datasource_project_luid(self, ds): return None - def _get_datasource_project_luid(self, ds): + def _get_datasource_project_luid(self, ds: dict) -> Optional[str]: # Only published and embedded data-sources are supported ds_type: Optional[str] = ds.get(tableau_constant.TYPE_NAME) if ds_type not in ( @@ -1486,7 +1509,7 @@ def _get_datasource_project_name(ds: dict) -> Optional[str]: return ds.get(tableau_constant.PROJECT_NAME) return None - def _get_project_browse_path_name(self, ds): + def _get_project_browse_path_name(self, ds: dict) -> Optional[str]: if self.config.extract_project_hierarchy is False: # backward compatibility. Just return the name of datasource project return self._get_datasource_project_name(ds) @@ -1494,9 +1517,8 @@ def _get_project_browse_path_name(self, ds): # form path as per nested project structure project_luid = self._get_datasource_project_luid(ds) if project_luid is None: - datasource_name: str = ds.get(tableau_constant.NAME) logger.warning( - f"Could not load project hierarchy for datasource {datasource_name}. Please check permissions." + f"Could not load project hierarchy for datasource {ds.get(tableau_constant.NAME)}. Please check permissions." ) logger.debug(f"datasource = {ds}") return None @@ -1509,7 +1531,7 @@ def _create_lineage_to_upstream_tables( # This adds an edge to upstream DatabaseTables using `upstreamTables` upstream_tables, _ = self.get_upstream_tables( tables, - datasource.get(tableau_constant.NAME), + datasource.get(tableau_constant.NAME) or "", self._get_project_browse_path_name(datasource), is_custom_sql=True, ) @@ -1541,7 +1563,6 @@ def parse_custom_sql( ] ], ) -> Optional["SqlParsingResult"]: - database_info = datasource.get(tableau_constant.DATABASE) or {} if datasource.get(tableau_constant.IS_UNSUPPORTED_CUSTOM_SQL) in (None, False): @@ -1593,7 +1614,6 @@ def parse_custom_sql( def _create_lineage_from_unsupported_csql( self, csql_urn: str, csql: dict ) -> Iterable[MetadataWorkUnit]: - parsed_result = self.parse_custom_sql( datasource=csql, datasource_urn=csql_urn, @@ -1812,7 +1832,9 @@ def emit_datasource( def get_custom_props_from_dict(self, obj: dict, keys: List[str]) -> Optional[dict]: return {key: str(obj[key]) for key in keys if obj.get(key)} or None - def _get_datasource_container_key(self, datasource, workbook, is_embedded_ds): + def _get_datasource_container_key( + self, datasource: dict, workbook: Optional[dict], is_embedded_ds: bool + ) -> Optional[ContainerKey]: container_key: Optional[ContainerKey] = None if is_embedded_ds: # It is embedded then parent is container is workbook if workbook is not None: @@ -2072,20 +2094,8 @@ def emit_sheets_as_charts( if wu is not None: yield wu - project_luid: Optional[str] = self._get_workbook_project_luid(workbook) - - if ( - workbook is not None - and project_luid - and project_luid in self.tableau_project_registry - and workbook.get(tableau_constant.NAME) - ): - browse_paths = BrowsePathsClass( - paths=[ - f"/{self.platform}/{self._project_luid_to_browse_path_name(project_luid)}" - f"/{workbook[tableau_constant.NAME].replace('/', REPLACE_SLASH_CHAR)}" - ] - ) + browse_paths = self.get_browse_paths_aspect(workbook) + if browse_paths: chart_snapshot.aspects.append(browse_paths) else: logger.warning( @@ -2221,7 +2231,7 @@ def gen_workbook_key(self, workbook_id: str) -> WorkbookKey: workbook_id=workbook_id, ) - def gen_project_key(self, project_luid): + def gen_project_key(self, project_luid: str) -> ProjectKey: return ProjectKey( platform=self.platform, instance=self.config.platform_instance, @@ -2378,32 +2388,8 @@ def emit_dashboard( if wu is not None: yield wu - project_luid: Optional[str] = self._get_workbook_project_luid(workbook) - if ( - workbook is not None - and project_luid - and project_luid in self.tableau_project_registry - and workbook.get(tableau_constant.NAME) - ): - browse_paths = BrowsePathsClass( - paths=[ - f"/{self.platform}/{self._project_luid_to_browse_path_name(project_luid)}" - f"/{workbook[tableau_constant.NAME].replace('/', REPLACE_SLASH_CHAR)}" - ] - ) - dashboard_snapshot.aspects.append(browse_paths) - elif ( - workbook is not None - and workbook.get(tableau_constant.PROJECT_NAME) - and workbook.get(tableau_constant.NAME) - ): - # browse path - browse_paths = BrowsePathsClass( - paths=[ - f"/{self.platform}/{workbook[tableau_constant.PROJECT_NAME].replace('/', REPLACE_SLASH_CHAR)}" - f"/{workbook[tableau_constant.NAME].replace('/', REPLACE_SLASH_CHAR)}" - ] - ) + browse_paths = self.get_browse_paths_aspect(workbook) + if browse_paths: dashboard_snapshot.aspects.append(browse_paths) else: logger.warning( @@ -2432,6 +2418,31 @@ def emit_dashboard( dashboard_snapshot.urn, ) + def get_browse_paths_aspect( + self, workbook: Optional[Dict] + ) -> Optional[BrowsePathsClass]: + browse_paths: Optional[BrowsePathsClass] = None + if workbook and workbook.get(tableau_constant.NAME): + project_luid: Optional[str] = self._get_workbook_project_luid(workbook) + if project_luid in self.tableau_project_registry: + browse_paths = BrowsePathsClass( + paths=[ + f"/{self.platform}/{self._project_luid_to_browse_path_name(project_luid)}" + f"/{workbook[tableau_constant.NAME].replace('/', REPLACE_SLASH_CHAR)}" + ] + ) + + elif workbook.get(tableau_constant.PROJECT_NAME): + # browse path + browse_paths = BrowsePathsClass( + paths=[ + f"/{self.platform}/{workbook[tableau_constant.PROJECT_NAME].replace('/', REPLACE_SLASH_CHAR)}" + f"/{workbook[tableau_constant.NAME].replace('/', REPLACE_SLASH_CHAR)}" + ] + ) + + return browse_paths + def emit_embedded_datasources(self) -> Iterable[MetadataWorkUnit]: datasource_filter = f"{tableau_constant.ID_WITH_IN}: {json.dumps(self.embedded_datasource_ids_being_used)}" From e680a97046c9ccf595dcd84bca12c5425b7bba4a Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Thu, 7 Sep 2023 01:18:11 +0530 Subject: [PATCH 07/20] fix(ingest/bigquery): fix partition and median queries for profiling (#8778) --- .../source/bigquery_v2/bigquery_report.py | 10 +++- .../source/bigquery_v2/bigquery_schema.py | 5 +- .../ingestion/source/bigquery_v2/profiler.py | 30 ++++++----- .../ingestion/source/ge_data_profiler.py | 51 ++++++++++++++----- .../tests/unit/test_bigquery_profiler.py | 8 +-- .../tests/unit/test_bigquery_source.py | 11 ++++ 6 files changed, 82 insertions(+), 33 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py index 8c46d8f675259..b2251fbb8ab1f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py @@ -61,7 +61,15 @@ class BigQueryV2Report(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowR partition_info: Dict[str, str] = field(default_factory=TopKDict) profile_table_selection_criteria: Dict[str, str] = field(default_factory=TopKDict) selected_profile_tables: Dict[str, List[str]] = field(default_factory=TopKDict) - invalid_partition_ids: Dict[str, str] = field(default_factory=TopKDict) + profiling_skipped_invalid_partition_ids: Dict[str, str] = field( + default_factory=TopKDict + ) + profiling_skipped_invalid_partition_type: Dict[str, str] = field( + default_factory=TopKDict + ) + profiling_skipped_partition_profiling_disabled: List[str] = field( + default_factory=LossyList + ) allow_pattern: Optional[str] = None deny_pattern: Optional[str] = None num_usage_workunits_emitted: int = 0 diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py index f8256f8e6fed6..47a04c545231b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py @@ -72,7 +72,7 @@ def from_range_partitioning( return cls( field=field, - type="RANGE", + type=RANGE_PARTITION_NAME, ) @classmethod @@ -151,6 +151,9 @@ class BigqueryQuery: """ # https://cloud.google.com/bigquery/docs/information-schema-table-storage?hl=en + # Note for max_partition_id - + # should we instead pick the partition with latest LAST_MODIFIED_TIME ? + # for range partitioning max may not be latest partition tables_for_dataset = f""" SELECT t.table_catalog as table_catalog, diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py index c9dcb4fe35c3f..b3e88459917b3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py @@ -94,7 +94,7 @@ def generate_partition_profiler_query( partition_where_clause: str if table.partition_info.type == RANGE_PARTITION_NAME: - if table.partition_info and table.partition_info.column: + if table.partition_info.column: partition_where_clause = ( f"{table.partition_info.column.name} >= {partition}" ) @@ -102,6 +102,9 @@ def generate_partition_profiler_query( logger.warning( f"Partitioned table {table.name} without partiton column" ) + self.report.profiling_skipped_invalid_partition_ids[ + f"{project}.{schema}.{table.name}" + ] = partition return None, None else: logger.debug( @@ -118,8 +121,8 @@ def generate_partition_profiler_query( logger.error( f"Unable to get partition range for partition id: {partition} it failed with exception {e}" ) - self.report.invalid_partition_ids[ - f"{schema}.{table.name}" + self.report.profiling_skipped_invalid_partition_ids[ + f"{project}.{schema}.{table.name}" ] = partition return None, None @@ -132,11 +135,14 @@ def generate_partition_profiler_query( partition_column_name = table.partition_info.column.name partition_data_type = table.partition_info.column.data_type if table.partition_info.type in ("HOUR", "DAY", "MONTH", "YEAR"): - partition_where_clause = f"{partition_data_type}(`{partition_column_name}`) BETWEEN {partition_data_type}('{partition_datetime}') AND {partition_data_type}('{upper_bound_partition_datetime}')" + partition_where_clause = f"`{partition_column_name}` BETWEEN {partition_data_type}('{partition_datetime}') AND {partition_data_type}('{upper_bound_partition_datetime}')" else: logger.warning( f"Not supported partition type {table.partition_info.type}" ) + self.report.profiling_skipped_invalid_partition_type[ + f"{project}.{schema}.{table.name}" + ] = table.partition_info.type return None, None custom_sql = """ SELECT @@ -153,7 +159,7 @@ def generate_partition_profiler_query( ) return (partition, custom_sql) - if table.max_shard_id: + elif table.max_shard_id: # For sharded table we want to get the partition id but not needed to generate custom query return table.max_shard_id, None @@ -162,15 +168,9 @@ def generate_partition_profiler_query( def get_workunits( self, project_id: str, tables: Dict[str, List[BigqueryTable]] ) -> Iterable[MetadataWorkUnit]: - # Otherwise, if column level profiling is enabled, use GE profiler. - if not self.config.project_id_pattern.allowed(project_id): - return profile_requests = [] for dataset in tables: - if not self.config.schema_pattern.allowed(dataset): - continue - for table in tables[dataset]: normalized_table_name = BigqueryTableIdentifier( project_id=project_id, dataset=dataset, table=table.name @@ -253,17 +253,16 @@ def get_bigquery_profile_request( if self.config.profiling.report_dropped_profiles: self.report.report_dropped(f"profile of {dataset_name}") return None + (partition, custom_sql) = self.generate_partition_profiler_query( project, dataset, table, self.config.profiling.partition_datetime ) - if partition is None and table.partition_info: self.report.report_warning( - "profile skipped as partitioned table is empty or partition id was invalid", + "profile skipped as partitioned table is empty or partition id or type was invalid", dataset_name, ) return None - if ( partition is not None and not self.config.profiling.partition_profiling_enabled @@ -271,6 +270,9 @@ def get_bigquery_profile_request( logger.debug( f"{dataset_name} and partition {partition} is skipped because profiling.partition_profiling_enabled property is disabled" ) + self.report.profiling_skipped_partition_profiling_disabled.append( + dataset_name + ) return None self.report.report_entity_profiled(dataset_name) diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index 6faa29f264d36..4394d108486be 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -5,6 +5,7 @@ import contextlib import dataclasses import functools +import json import logging import threading import traceback @@ -27,7 +28,7 @@ import sqlalchemy as sa import sqlalchemy.sql.compiler from great_expectations.core.util import convert_to_json_serializable -from great_expectations.data_context import BaseDataContext +from great_expectations.data_context import AbstractDataContext, BaseDataContext from great_expectations.data_context.types.base import ( DataContextConfig, DatasourceConfig, @@ -55,6 +56,7 @@ DatasetProfileClass, HistogramClass, PartitionSpecClass, + PartitionTypeClass, QuantileClass, ValueFrequencyClass, ) @@ -70,6 +72,12 @@ logger: logging.Logger = logging.getLogger(__name__) P = ParamSpec("P") +POSTGRESQL = "postgresql" +MYSQL = "mysql" +SNOWFLAKE = "snowflake" +BIGQUERY = "bigquery" +REDSHIFT = "redshift" +TRINO = "trino" # The reason for this wacky structure is quite fun. GE basically assumes that # the config structures were generated directly from YML and further assumes that @@ -113,14 +121,14 @@ class GEProfilerRequest: def get_column_unique_count_patch(self: SqlAlchemyDataset, column: str) -> int: - if self.engine.dialect.name.lower() == "redshift": + if self.engine.dialect.name.lower() == REDSHIFT: element_values = self.engine.execute( sa.select( [sa.text(f'APPROXIMATE count(distinct "{column}")')] # type:ignore ).select_from(self._table) ) return convert_to_json_serializable(element_values.fetchone()[0]) - elif self.engine.dialect.name.lower() == "bigquery": + elif self.engine.dialect.name.lower() == BIGQUERY: element_values = self.engine.execute( sa.select( [ @@ -131,7 +139,7 @@ def get_column_unique_count_patch(self: SqlAlchemyDataset, column: str) -> int: ).select_from(self._table) ) return convert_to_json_serializable(element_values.fetchone()[0]) - elif self.engine.dialect.name.lower() == "snowflake": + elif self.engine.dialect.name.lower() == SNOWFLAKE: element_values = self.engine.execute( sa.select(sa.func.APPROX_COUNT_DISTINCT(sa.column(column))).select_from( self._table @@ -361,7 +369,7 @@ def _get_column_cardinality( def _get_dataset_rows(self, dataset_profile: DatasetProfileClass) -> None: if self.config.profile_table_row_count_estimate_only: dialect_name = self.dataset.engine.dialect.name.lower() - if dialect_name == "postgresql": + if dialect_name == POSTGRESQL: schema_name = self.dataset_name.split(".")[1] table_name = self.dataset_name.split(".")[2] logger.debug( @@ -370,7 +378,7 @@ def _get_dataset_rows(self, dataset_profile: DatasetProfileClass) -> None: get_estimate_script = sa.text( f"SELECT c.reltuples AS estimate FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE c.relname = '{table_name}' AND n.nspname = '{schema_name}'" ) - elif dialect_name == "mysql": + elif dialect_name == MYSQL: schema_name = self.dataset_name.split(".")[0] table_name = self.dataset_name.split(".")[1] logger.debug( @@ -421,7 +429,7 @@ def _get_dataset_column_median( if not self.config.include_field_median_value: return try: - if self.dataset.engine.dialect.name.lower() == "snowflake": + if self.dataset.engine.dialect.name.lower() == SNOWFLAKE: column_profile.median = str( self.dataset.engine.execute( sa.select([sa.func.median(sa.column(column))]).select_from( @@ -429,6 +437,14 @@ def _get_dataset_column_median( ) ).scalar() ) + elif self.dataset.engine.dialect.name.lower() == BIGQUERY: + column_profile.median = str( + self.dataset.engine.execute( + sa.select( + sa.text(f"approx_quantiles(`{column}`, 2) [OFFSET (1)]") + ).select_from(self.dataset._table) + ).scalar() + ) else: column_profile.median = str(self.dataset.get_column_median(column)) except Exception as e: @@ -583,6 +599,13 @@ def generate_dataset_profile( # noqa: C901 (complexity) profile = DatasetProfileClass(timestampMillis=get_sys_time()) if self.partition: profile.partitionSpec = PartitionSpecClass(partition=self.partition) + elif self.config.limit and self.config.offset: + profile.partitionSpec = PartitionSpecClass( + type=PartitionTypeClass.QUERY, + partition=json.dumps( + dict(limit=self.config.limit, offset=self.config.offset) + ), + ) profile.fieldProfiles = [] self._get_dataset_rows(profile) @@ -717,7 +740,7 @@ def generate_dataset_profile( # noqa: C901 (complexity) @dataclasses.dataclass class GEContext: - data_context: BaseDataContext + data_context: AbstractDataContext datasource_name: str @@ -935,7 +958,7 @@ def _generate_single_profile( } bigquery_temp_table: Optional[str] = None - if platform == "bigquery" and ( + if platform == BIGQUERY and ( custom_sql or self.config.limit or self.config.offset ): # On BigQuery, we need to bypass GE's mechanism for creating temporary tables because @@ -950,6 +973,8 @@ def _generate_single_profile( ) if custom_sql is not None: # Note that limit and offset are not supported for custom SQL. + # Presence of custom SQL represents that the bigquery table + # is either partitioned or sharded bq_sql = custom_sql else: bq_sql = f"SELECT * FROM `{table}`" @@ -1015,7 +1040,7 @@ def _generate_single_profile( finally: raw_connection.close() - if platform == "bigquery": + if platform == BIGQUERY: if bigquery_temp_table: ge_config["table"] = bigquery_temp_table ge_config["schema"] = None @@ -1066,7 +1091,7 @@ def _generate_single_profile( self.report.report_warning(pretty_name, f"Profiling exception {e}") return None finally: - if self.base_engine.engine.name == "trino": + if self.base_engine.engine.name == TRINO: self._drop_trino_temp_table(batch) def _get_ge_dataset( @@ -1103,7 +1128,7 @@ def _get_ge_dataset( **batch_kwargs, }, ) - if platform is not None and platform == "bigquery": + if platform == BIGQUERY: # This is done as GE makes the name as DATASET.TABLE # but we want it to be PROJECT.DATASET.TABLE instead for multi-project setups name_parts = pretty_name.split(".") @@ -1124,7 +1149,7 @@ def _get_ge_dataset( # Stringified types are used to avoid dialect specific import errors @lru_cache(maxsize=1) def _get_column_types_to_ignore(dialect_name: str) -> List[str]: - if dialect_name.lower() == "postgresql": + if dialect_name.lower() == POSTGRESQL: return ["JSON"] return [] diff --git a/metadata-ingestion/tests/unit/test_bigquery_profiler.py b/metadata-ingestion/tests/unit/test_bigquery_profiler.py index 44ce5f0a02e37..fb5133b24474c 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_profiler.py +++ b/metadata-ingestion/tests/unit/test_bigquery_profiler.py @@ -64,7 +64,7 @@ def test_generate_day_partitioned_partition_profiler_query(): FROM `test_project.test_dataset.test_table` WHERE - TIMESTAMP(`date`) BETWEEN TIMESTAMP('2020-01-01 00:00:00') AND TIMESTAMP('2020-01-02 00:00:00') + `date` BETWEEN TIMESTAMP('2020-01-01 00:00:00') AND TIMESTAMP('2020-01-02 00:00:00') """.strip() assert "20200101" == query[0] @@ -107,7 +107,7 @@ def test_generate_day_partitioned_partition_profiler_query_with_set_partition_ti FROM `test_project.test_dataset.test_table` WHERE - TIMESTAMP(`date`) BETWEEN TIMESTAMP('2020-01-01 00:00:00') AND TIMESTAMP('2020-01-02 00:00:00') + `date` BETWEEN TIMESTAMP('2020-01-01 00:00:00') AND TIMESTAMP('2020-01-02 00:00:00') """.strip() assert "20200101" == query[0] @@ -150,7 +150,7 @@ def test_generate_hour_partitioned_partition_profiler_query(): FROM `test_project.test_dataset.test_table` WHERE - TIMESTAMP(`partition_column`) BETWEEN TIMESTAMP('2020-01-01 03:00:00') AND TIMESTAMP('2020-01-01 04:00:00') + `partition_column` BETWEEN TIMESTAMP('2020-01-01 03:00:00') AND TIMESTAMP('2020-01-01 04:00:00') """.strip() assert "2020010103" == query[0] @@ -183,7 +183,7 @@ def test_generate_ingestion_partitioned_partition_profiler_query(): FROM `test_project.test_dataset.test_table` WHERE - TIMESTAMP(`_PARTITIONTIME`) BETWEEN TIMESTAMP('2020-01-01 00:00:00') AND TIMESTAMP('2020-01-02 00:00:00') + `_PARTITIONTIME` BETWEEN TIMESTAMP('2020-01-01 00:00:00') AND TIMESTAMP('2020-01-02 00:00:00') """.strip() assert "20200101" == query[0] diff --git a/metadata-ingestion/tests/unit/test_bigquery_source.py b/metadata-ingestion/tests/unit/test_bigquery_source.py index 47418d9a989bb..6907f926249f5 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_source.py +++ b/metadata-ingestion/tests/unit/test_bigquery_source.py @@ -132,6 +132,17 @@ def test_get_projects_with_project_ids_overrides_project_id_pattern(): ] +def test_platform_instance_config_always_none(): + config = BigQueryV2Config.parse_obj( + {"include_data_platform_instance": True, "platform_instance": "something"} + ) + assert config.platform_instance is None + + config = BigQueryV2Config(platform_instance="something", project_id="project_id") + assert config.project_id == "project_id" + assert config.platform_instance is None + + def test_get_dataplatform_instance_aspect_returns_project_id(): project_id = "project_id" expected_instance = ( From f353bc7f4494b5c4e77f2dcba6b5c9d8bef8eee5 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 6 Sep 2023 12:49:29 -0700 Subject: [PATCH 08/20] docs: add datahub source to integrations page (#8787) --- docs-website/filterTagIndexes.json | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs-website/filterTagIndexes.json b/docs-website/filterTagIndexes.json index a86262731fc9a..c154b586fe66e 100644 --- a/docs-website/filterTagIndexes.json +++ b/docs-website/filterTagIndexes.json @@ -88,6 +88,17 @@ "Features": "Stateful Ingestion, Column Level Lineage, UI Ingestion, Status Aspect" } }, + { + "Path": "docs/generated/ingestion/sources/datahub", + "imgPath": "img/acryl-logo-light-mark.png", + "Title": "DataHub", + "Description": "Integrate your open source DataHub instance with Acryl Cloud or other on-prem DataHub instances", + "tags": { + "Platform Type": "Metadata", + "Connection Type": "Pull", + "Features": "Stateful Ingestion" + } + }, { "Path": "docs/generated/ingestion/sources/dbt", "imgPath": "img/logos/platforms/dbt.svg", From 4ffad4d9b91c25d9f8380fba7d81f65fedfd188c Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 6 Sep 2023 12:49:44 -0700 Subject: [PATCH 09/20] chore(ingest): upgrade sqlglot fork (#8775) Co-authored-by: Tamas Nemeth --- metadata-ingestion/setup.py | 2 +- .../goldens/test_bigquery_unnest_columns.json | 42 ++++--------------- .../goldens/test_create_view_as_select.json | 4 +- .../goldens/test_select_with_ctes.json | 8 ++-- 4 files changed, 14 insertions(+), 42 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 32e1cf926cc68..123617ac70ebb 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -136,7 +136,7 @@ def get_long_description(): sqlglot_lib = { # Using an Acryl fork of sqlglot. # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1 - "acryl-sqlglot==16.7.6.dev6", + "acryl-sqlglot==18.0.2.dev15", } aws_common = { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_unnest_columns.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_unnest_columns.json index 69eb6f4ea631b..4660f44b6d744 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_unnest_columns.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_unnest_columns.json @@ -1,16 +1,8 @@ { "query_type": "SELECT", "in_tables": [ - { - "database": "bq-proj", - "db_schema": "dataset", - "table": "table1" - }, - { - "database": "bq-proj", - "db_schema": "dataset", - "table": "table2" - } + "urn:li:dataset:(urn:li:dataPlatform:bigquery,bq-proj.dataset.table1,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:bigquery,bq-proj.dataset.table2,PROD)" ], "out_tables": [], "column_lineage": [ @@ -21,11 +13,7 @@ }, "upstreams": [ { - "table": { - "database": "bq-proj", - "db_schema": "dataset", - "table": "table1" - }, + "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,bq-proj.dataset.table1,PROD)", "column": "reporting_day" } ] @@ -37,19 +25,11 @@ }, "upstreams": [ { - "table": { - "database": "bq-proj", - "db_schema": "dataset", - "table": "table1" - }, + "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,bq-proj.dataset.table1,PROD)", "column": "by_product.product_code" }, { - "table": { - "database": "bq-proj", - "db_schema": "dataset", - "table": "table2" - }, + "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,bq-proj.dataset.table2,PROD)", "column": "other_field" } ] @@ -61,11 +41,7 @@ }, "upstreams": [ { - "table": { - "database": "bq-proj", - "db_schema": "dataset", - "table": "table2" - }, + "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,bq-proj.dataset.table2,PROD)", "column": "other_field" } ] @@ -77,11 +53,7 @@ }, "upstreams": [ { - "table": { - "database": "bq-proj", - "db_schema": "dataset", - "table": "table1" - }, + "table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,bq-proj.dataset.table1,PROD)", "column": "by_product.product_code_dau" } ] diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json index 22bb78dc86dd9..1ca56840531e4 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json @@ -15,7 +15,7 @@ "upstreams": [ { "table": "urn:li:dataset:(urn:li:dataPlatform:oracle,scott.emp,PROD)", - "column": "deptno" + "column": "DEPTNO" } ] }, @@ -34,7 +34,7 @@ "upstreams": [ { "table": "urn:li:dataset:(urn:li:dataPlatform:oracle,scott.emp,PROD)", - "column": "sal" + "column": "SAL" } ] } diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json index 4647b27934a10..3e02314d6e8c3 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json @@ -9,24 +9,24 @@ { "downstream": { "table": null, - "column": "col1" + "column": "COL1" }, "upstreams": [ { "table": "urn:li:dataset:(urn:li:dataPlatform:oracle,table1,PROD)", - "column": "col1" + "column": "COL1" } ] }, { "downstream": { "table": null, - "column": "col3" + "column": "COL3" }, "upstreams": [ { "table": "urn:li:dataset:(urn:li:dataPlatform:oracle,table2,PROD)", - "column": "col3" + "column": "COL3" } ] } From 6a1a1dd256ff51291f883af743e6efb059505ae2 Mon Sep 17 00:00:00 2001 From: Jeff Merrick Date: Wed, 6 Sep 2023 22:14:47 -0500 Subject: [PATCH 10/20] docs: minor fix on versioning navbar and dropdown (#8790) --- .../docs/_components/SearchBar/index.jsx | 87 +++++++++---------- .../_components/SearchBar/search.module.scss | 27 +++--- .../DocsVersionDropdownNavbarItem.js | 70 +++++++++++++++ 3 files changed, 126 insertions(+), 58 deletions(-) create mode 100644 docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js diff --git a/docs-website/src/pages/docs/_components/SearchBar/index.jsx b/docs-website/src/pages/docs/_components/SearchBar/index.jsx index 4f26d758bd5de..e3b61fb3cb476 100644 --- a/docs-website/src/pages/docs/_components/SearchBar/index.jsx +++ b/docs-website/src/pages/docs/_components/SearchBar/index.jsx @@ -18,8 +18,8 @@ import ExecutionEnvironment from "@docusaurus/ExecutionEnvironment"; import { usePluralForm, useEvent } from "@docusaurus/theme-common"; import useDocusaurusContext from "@docusaurus/useDocusaurusContext"; import { useAllDocsData } from "@docusaurus/plugin-content-docs/client"; -import { useSearchQueryString } from '@docusaurus/theme-common'; -import {useTitleFormatter} from '@docusaurus/theme-common/internal'; +import { useSearchQueryString } from "@docusaurus/theme-common"; +import { useTitleFormatter } from "@docusaurus/theme-common/internal"; import Translate, { translate } from "@docusaurus/Translate"; import styles from "./search.module.scss"; @@ -48,7 +48,7 @@ function useDocsSearchVersionsHelpers() { // docsPluginId -> versionName map const [searchVersions, setSearchVersions] = useState(() => { return Object.entries(allDocsData).reduce((acc, [pluginId, pluginData]) => { - return { ...acc, [pluginId]: pluginData.versions[0].name }; + return { ...acc, [pluginId]: pluginData.versions?.[1].name }; }, {}); }); @@ -70,11 +70,11 @@ const SearchVersionSelectList = ({ docsSearchVersionsHelpers }) => { const versionedPluginEntries = Object.entries(docsSearchVersionsHelpers.allDocsData) // Do not show a version select for unversioned docs plugin instances .filter(([, docsData]) => docsData.versions.length > 1); - return ( -
+ <> {versionedPluginEntries.map(([pluginId, docsData]) => { const labelPrefix = versionedPluginEntries.length > 1 ? `${pluginId}: ` : ""; + return ( ); })} -
+ ); }; @@ -274,46 +274,41 @@ function SearchBar() { return (
-
e.preventDefault()} className={styles.searchForm}> - setSearchQuery(e.target.value)} - value={searchQuery} - autoComplete="off" - autoFocus - /> - - - -
- {docsSearchVersionsHelpers.versioningEnabled && ( - - )} -
- {!!searchResultState.totalResults && - documentsFoundPlural(searchResultState.totalResults)} +
+
e.preventDefault()} className={styles.searchForm}> + setSearchQuery(e.target.value)} + value={searchQuery} + autoComplete="off" + autoFocus + /> + + + +
+ {docsSearchVersionsHelpers.versioningEnabled && }
+
{!!searchResultState.totalResults && documentsFoundPlural(searchResultState.totalResults)}
{searchResultState.items.length > 0 ? (
@@ -375,4 +370,4 @@ function SearchBar() { ); } -export default SearchBar; \ No newline at end of file +export default SearchBar; diff --git a/docs-website/src/pages/docs/_components/SearchBar/search.module.scss b/docs-website/src/pages/docs/_components/SearchBar/search.module.scss index 30a2973384ba6..6faaf19c7e603 100644 --- a/docs-website/src/pages/docs/_components/SearchBar/search.module.scss +++ b/docs-website/src/pages/docs/_components/SearchBar/search.module.scss @@ -5,9 +5,14 @@ * LICENSE file in the root directory of this source tree. */ +.searchHeader { + display: flex; + gap: 0.8rem; +} + .searchForm { position: relative; - + flex: 1; &:focus + svg { color: red; } @@ -23,11 +28,16 @@ .searchQueryInput { padding: 0.8rem 0.8rem 0.8rem 3rem; + width: 100%; } .searchVersionInput { - padding: 0.8rem 2rem 0.8rem 2rem; - text-align: center; + padding: 0.8rem 2rem 0.8rem 0.8rem; + appearance: none; + background-image: url("data:image/svg+xml;charset=UTF-8,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='currentColor' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3e%3cpolyline points='6 9 12 15 18 9'%3e%3c/polyline%3e%3c/svg%3e"); + background-repeat: no-repeat; + background-position: right 0.6rem center; + background-size: 1em; } .searchQueryInput, @@ -36,14 +46,13 @@ border-style: solid; border-color: transparent; font: var(--ifm-font-size-base) var(--ifm-font-family-base); - width: 100%; - background: var(--docsearch-searchbox-background); + background-color: var(--docsearch-searchbox-background); color: var(--docsearch-text-color); margin-bottom: 0.5rem; transition: border var(--ifm-transition-fast) ease; &:focus { - background: var(--ifm-background-color); + background-color: var(--ifm-background-color); + svg { color: var(--docsearch-primary-color); @@ -99,11 +108,6 @@ } @media only screen and (max-width: 996px) { - .searchVersionColumn { - max-width: 40% !important; - margin: auto; - } - .searchResultsColumn { max-width: 60% !important; } @@ -130,7 +134,6 @@ margin: auto; } - .loadingSpinner { width: 3rem; height: 3rem; diff --git a/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js b/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js new file mode 100644 index 0000000000000..cc04ab23d3cf3 --- /dev/null +++ b/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js @@ -0,0 +1,70 @@ +import React from "react"; +import { useVersions, useActiveDocContext } from "@docusaurus/plugin-content-docs/client"; +import { useDocsPreferredVersion } from "@docusaurus/theme-common"; +import { useDocsVersionCandidates } from "@docusaurus/theme-common/internal"; +import { translate } from "@docusaurus/Translate"; +import { useLocation } from "@docusaurus/router"; +import DefaultNavbarItem from "@theme/NavbarItem/DefaultNavbarItem"; +import DropdownNavbarItem from "@theme/NavbarItem/DropdownNavbarItem"; +const getVersionMainDoc = (version) => version.docs.find((doc) => doc.id === version.mainDocId); +export default function DocsVersionDropdownNavbarItem({ + mobile, + docsPluginId, + dropdownActiveClassDisabled, + dropdownItemsBefore, + dropdownItemsAfter, + ...props +}) { + const { search, hash } = useLocation(); + const activeDocContext = useActiveDocContext(docsPluginId); + const versions = useVersions(docsPluginId); + const { savePreferredVersionName } = useDocsPreferredVersion(docsPluginId); + const versionLinks = versions.map((version) => { + // We try to link to the same doc, in another version + // When not possible, fallback to the "main doc" of the version + const versionDoc = activeDocContext.alternateDocVersions[version.name] ?? getVersionMainDoc(version); + return { + label: version.label, + // preserve ?search#hash suffix on version switches + to: `${versionDoc.path}${search}${hash}`, + isActive: () => version === activeDocContext.activeVersion, + onClick: () => savePreferredVersionName(version.name), + }; + }); + const items = [...dropdownItemsBefore, ...versionLinks, ...dropdownItemsAfter]; + const dropdownVersion = useDocsVersionCandidates(docsPluginId)[0]; + // Mobile dropdown is handled a bit differently + const dropdownLabel = + mobile && items.length > 1 + ? translate({ + id: "theme.navbar.mobileVersionsDropdown.label", + message: "Versions", + description: "The label for the navbar versions dropdown on mobile view", + }) + : dropdownVersion.label; + const dropdownTo = mobile && items.length > 1 ? undefined : getVersionMainDoc(dropdownVersion).path; + // We don't want to render a version dropdown with 0 or 1 item. If we build + // the site with a single docs version (onlyIncludeVersions: ['1.0.0']), + // We'd rather render a button instead of a dropdown + if (items.length <= 1) { + return ( + false : undefined} + /> + ); + } + return ( + false : undefined} + /> + ); +} From 0e8000cf18eac507156f56d0e5a8d428114f6710 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 7 Sep 2023 11:32:28 -0700 Subject: [PATCH 11/20] feat(ingest): drop sql_metadata parser (#8765) --- metadata-ingestion/setup.py | 1 - .../src/datahub/utilities/sql_parser.py | 56 -------------- .../tests/unit/test_utilities.py | 74 +------------------ 3 files changed, 4 insertions(+), 127 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 123617ac70ebb..d8668e8925546 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -303,7 +303,6 @@ def get_long_description(): # TODO: I doubt we need all three sql parsing libraries. *sqllineage_lib, *sqlglot_lib, - "sql_metadata", "sqlalchemy-bigquery>=1.4.1", "google-cloud-datacatalog-lineage==0.2.2", }, diff --git a/metadata-ingestion/src/datahub/utilities/sql_parser.py b/metadata-ingestion/src/datahub/utilities/sql_parser.py index 6b1a94ba69657..61693b52b350f 100644 --- a/metadata-ingestion/src/datahub/utilities/sql_parser.py +++ b/metadata-ingestion/src/datahub/utilities/sql_parser.py @@ -1,7 +1,5 @@ -import contextlib import logging import multiprocessing -import re import traceback from multiprocessing import Process, Queue from typing import Any, List, Optional, Tuple @@ -9,63 +7,9 @@ from datahub.utilities.sql_lineage_parser_impl import SqlLineageSQLParserImpl from datahub.utilities.sql_parser_base import SQLParser -with contextlib.suppress(ImportError): - from sql_metadata import Parser as MetadataSQLParser logger = logging.getLogger(__name__) -class MetadataSQLSQLParser(SQLParser): - _DATE_SWAP_TOKEN = "__d_a_t_e" - - def __init__(self, sql_query: str, use_external_process: bool = True) -> None: - super().__init__(sql_query, use_external_process) - - original_sql_query = sql_query - - # MetadataSQLParser makes mistakes on lateral flatten queries, use the prefix - if "lateral flatten" in sql_query: - sql_query = sql_query[: sql_query.find("lateral flatten")] - - # MetadataSQLParser also makes mistakes on columns called "date", rename them - sql_query = re.sub(r"\sdate\s", f" {self._DATE_SWAP_TOKEN} ", sql_query) - - # MetadataSQLParser does not handle "encode" directives well. Remove them - sql_query = re.sub(r"\sencode [a-zA-Z]*", "", sql_query) - - if sql_query != original_sql_query: - logger.debug(f"rewrote original query {original_sql_query} as {sql_query}") - - self._parser = MetadataSQLParser(sql_query) - - def get_tables(self) -> List[str]: - result = self._parser.tables - # Sort tables to make the list deterministic - result.sort() - return result - - def get_columns(self) -> List[str]: - columns_dict = self._parser.columns_dict - # don't attempt to parse columns if there are joins involved - if columns_dict.get("join", {}) != {}: - return [] - - columns_alias_dict = self._parser.columns_aliases_dict - filtered_cols = [ - c - for c in columns_dict.get("select", {}) - if c != "NULL" and not isinstance(c, list) - ] - if columns_alias_dict is not None: - for col_alias in columns_alias_dict.get("select", []): - if col_alias in self._parser.columns_aliases: - col_name = self._parser.columns_aliases[col_alias] - filtered_cols = [ - col_alias if c == col_name else c for c in filtered_cols - ] - # swap back renamed date column - return ["date" if c == self._DATE_SWAP_TOKEN else c for c in filtered_cols] - - def sql_lineage_parser_impl_func_wrapper( queue: Optional[multiprocessing.Queue], sql_query: str, use_raw_names: bool = False ) -> Optional[Tuple[List[str], List[str], Any]]: diff --git a/metadata-ingestion/tests/unit/test_utilities.py b/metadata-ingestion/tests/unit/test_utilities.py index 32b5a6401ded6..368cedfe48040 100644 --- a/metadata-ingestion/tests/unit/test_utilities.py +++ b/metadata-ingestion/tests/unit/test_utilities.py @@ -1,5 +1,5 @@ from datahub.utilities.delayed_iter import delayed_iter -from datahub.utilities.sql_parser import MetadataSQLSQLParser, SqlLineageSQLParser +from datahub.utilities.sql_parser import SqlLineageSQLParser def test_delayed_iter(): @@ -36,18 +36,10 @@ def maker(n): ] -def test_metadatasql_sql_parser_get_tables_from_simple_query(): - sql_query = "SELECT foo.a, foo.b, bar.c FROM foo JOIN bar ON (foo.a == bar.b);" - - tables_list = MetadataSQLSQLParser(sql_query).get_tables() - tables_list.sort() - assert tables_list == ["bar", "foo"] - - def test_sqllineage_sql_parser_get_tables_from_simple_query(): sql_query = "SELECT foo.a, foo.b, bar.c FROM foo JOIN bar ON (foo.a == bar.b);" - tables_list = MetadataSQLSQLParser(sql_query).get_tables() + tables_list = SqlLineageSQLParser(sql_query).get_tables() tables_list.sort() assert tables_list == ["bar", "foo"] @@ -121,7 +113,7 @@ def test_sqllineage_sql_parser_get_columns_from_simple_query(): assert columns_list == ["a", "b"] -def test_metadatasql_sql_parser_get_columns_with_alias_and_count_star(): +def test_sqllineage_sql_parser_get_columns_with_alias_and_count_star(): sql_query = "SELECT foo.a, foo.b, bar.c as test, count(*) as count FROM foo JOIN bar ON (foo.a == bar.b);" columns_list = SqlLineageSQLParser(sql_query).get_columns() @@ -129,7 +121,7 @@ def test_metadatasql_sql_parser_get_columns_with_alias_and_count_star(): assert columns_list == ["a", "b", "count", "test"] -def test_metadatasql_sql_parser_get_columns_with_more_complex_join(): +def test_sqllineage_sql_parser_get_columns_with_more_complex_join(): sql_query = """ INSERT INTO @@ -206,21 +198,6 @@ def test_sqllineage_sql_parser_get_columns_complex_query_with_union(): assert columns_list == ["c", "date", "e", "u", "x"] -def test_metadatasql_sql_parser_get_tables_from_templated_query(): - sql_query = """ - SELECT - country, - city, - timestamp, - measurement - FROM - ${my_view.SQL_TABLE_NAME} AS my_view -""" - tables_list = MetadataSQLSQLParser(sql_query).get_tables() - tables_list.sort() - assert tables_list == ["my_view.SQL_TABLE_NAME"] - - def test_sqllineage_sql_parser_get_tables_from_templated_query(): sql_query = """ SELECT @@ -236,21 +213,6 @@ def test_sqllineage_sql_parser_get_tables_from_templated_query(): assert tables_list == ["my_view.SQL_TABLE_NAME"] -def test_metadatasql_sql_parser_get_columns_from_templated_query(): - sql_query = """ - SELECT - country, - city, - timestamp, - measurement - FROM - ${my_view.SQL_TABLE_NAME} AS my_view -""" - columns_list = MetadataSQLSQLParser(sql_query).get_columns() - columns_list.sort() - assert columns_list == ["city", "country", "measurement", "timestamp"] - - def test_sqllineage_sql_parser_get_columns_from_templated_query(): sql_query = """ SELECT @@ -277,34 +239,6 @@ def test_sqllineage_sql_parser_with_weird_lookml_query(): assert columns_list == ["aliased_platform", "country", "date"] -def test_metadatasql_sql_parser_with_weird_lookml_query(): - sql_query = """ - SELECT date DATE, - platform VARCHAR(20) AS aliased_platform, - country VARCHAR(20) FROM fragment_derived_view' - """ - columns_list = MetadataSQLSQLParser(sql_query).get_columns() - columns_list.sort() - assert columns_list == ["aliased_platform", "country", "date"] - - -def test_metadatasql_sql_parser_tables_from_redash_query(): - sql_query = """SELECT -name, -SUM(quantity * list_price * (1 - discount)) AS total, -YEAR(order_date) as order_year -FROM -`orders` o -INNER JOIN `order_items` i ON i.order_id = o.order_id -INNER JOIN `staffs` s ON s.staff_id = o.staff_id -GROUP BY -name, -year(order_date)""" - table_list = MetadataSQLSQLParser(sql_query).get_tables() - table_list.sort() - assert table_list == ["order_items", "orders", "staffs"] - - def test_sqllineage_sql_parser_tables_from_redash_query(): sql_query = """SELECT name, From eb4107a6e36a77c524b195834b4dbe77c19d432d Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 7 Sep 2023 11:32:41 -0700 Subject: [PATCH 12/20] fix(ingest): drop `wrap_aspect_as_workunit` method (#8766) --- .../src/datahub/emitter/mcp_builder.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py index 40df214f49433..844a29f1c78a3 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py @@ -2,7 +2,6 @@ import json from typing import Any, Dict, Iterable, List, Optional, TypeVar -from deprecated import deprecated from pydantic.fields import Field from pydantic.main import BaseModel @@ -30,7 +29,6 @@ StatusClass, SubTypesClass, TagAssociationClass, - _Aspect, ) @@ -176,23 +174,6 @@ def add_tags_to_entity_wu( ).as_workunit() -@deprecated("use MetadataChangeProposalWrapper(...).as_workunit() instead") -def wrap_aspect_as_workunit( - entityName: str, - entityUrn: str, - aspectName: str, - aspect: _Aspect, -) -> MetadataWorkUnit: - wu = MetadataWorkUnit( - id=f"{aspectName}-for-{entityUrn}", - mcp=MetadataChangeProposalWrapper( - entityUrn=entityUrn, - aspect=aspect, - ), - ) - return wu - - def gen_containers( container_key: KeyType, name: str, From cf166843b562ae1df303e4b250b95a9bfe736539 Mon Sep 17 00:00:00 2001 From: Indy Prentice Date: Thu, 7 Sep 2023 17:29:55 -0300 Subject: [PATCH 13/20] feat(search): Also de-duplicate the field queries based on field names (#8788) Co-authored-by: Indy Prentice Co-authored-by: David Leifker --- .../query/request/SearchQueryBuilder.java | 139 ++++++++----- .../query/request/SearchQueryBuilderTest.java | 185 ++++++++++++++++-- 2 files changed, 258 insertions(+), 66 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java index a00882cfde240..4eebf02d70e9e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.search.elasticsearch.query.request; +import com.google.common.annotations.VisibleForTesting; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; @@ -19,6 +20,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -116,11 +118,8 @@ private QueryBuilder buildInternalQuery(@Nullable QueryConfiguration customQuery QueryStringQueryBuilder queryBuilder = QueryBuilders.queryStringQuery(withoutQueryPrefix); queryBuilder.defaultOperator(Operator.AND); - entitySpecs.stream() - .map(this::getStandardFields) - .flatMap(Set::stream) - .distinct() - .forEach(cfg -> queryBuilder.field(cfg.fieldName(), cfg.boost())); + getStandardFields(entitySpecs).forEach(entitySpec -> + queryBuilder.field(entitySpec.fieldName(), entitySpec.boost())); finalQuery.should(queryBuilder); if (exactMatchConfiguration.isEnableStructured()) { getPrefixAndExactMatchQuery(null, entitySpecs, withoutQueryPrefix).ifPresent(finalQuery::should); @@ -130,16 +129,47 @@ private QueryBuilder buildInternalQuery(@Nullable QueryConfiguration customQuery return finalQuery; } - private Set getStandardFields(@Nonnull EntitySpec entitySpec) { + /** + * Gets searchable fields from all entities in the input collection. De-duplicates fields across entities. + * @param entitySpecs: Entity specs to extract searchable fields from + * @return A set of SearchFieldConfigs containing the searchable fields from the input entities. + */ + @VisibleForTesting + Set getStandardFields(@Nonnull Collection entitySpecs) { Set fields = new HashSet<>(); - // Always present final float urnBoost = Float.parseFloat((String) PRIMARY_URN_SEARCH_PROPERTIES.get("boostScore")); fields.add(SearchFieldConfig.detectSubFieldType("urn", urnBoost, SearchableAnnotation.FieldType.URN, true)); fields.add(SearchFieldConfig.detectSubFieldType("urn.delimited", urnBoost * partialConfiguration.getUrnFactor(), - SearchableAnnotation.FieldType.URN, true)); + SearchableAnnotation.FieldType.URN, true)); + + entitySpecs.stream() + .map(this::getFieldsFromEntitySpec) + .flatMap(Set::stream) + .collect(Collectors.groupingBy(SearchFieldConfig::fieldName)).forEach((key, value) -> + fields.add( + new SearchFieldConfig( + key, + value.get(0).shortName(), + (float) value.stream().mapToDouble(SearchFieldConfig::boost).average().getAsDouble(), + value.get(0).analyzer(), + value.stream().anyMatch(SearchFieldConfig::hasKeywordSubfield), + value.stream().anyMatch(SearchFieldConfig::hasDelimitedSubfield), + value.stream().anyMatch(SearchFieldConfig::hasWordGramSubfields), + true, + value.stream().anyMatch(SearchFieldConfig::isDelimitedSubfield), + value.stream().anyMatch(SearchFieldConfig::isKeywordSubfield), + value.stream().anyMatch(SearchFieldConfig::isWordGramSubfield) + )) + ); + return fields; + } + + @VisibleForTesting + Set getFieldsFromEntitySpec(EntitySpec entitySpec) { + Set fields = new HashSet<>(); List searchableFieldSpecs = entitySpec.getSearchableFieldSpecs(); for (SearchableFieldSpec fieldSpec : searchableFieldSpecs) { if (!fieldSpec.getSearchableAnnotation().isQueryByDefault()) { @@ -153,8 +183,8 @@ private Set getStandardFields(@Nonnull EntitySpec entitySpec) final SearchableAnnotation searchableAnnotation = fieldSpec.getSearchableAnnotation(); fields.add(SearchFieldConfig.detectSubFieldType(searchFieldConfig.fieldName() + ".delimited", - searchFieldConfig.boost() * partialConfiguration.getFactor(), - searchableAnnotation.getFieldType(), searchableAnnotation.isQueryByDefault())); + searchFieldConfig.boost() * partialConfiguration.getFactor(), + searchableAnnotation.getFieldType(), searchableAnnotation.isQueryByDefault())); if (SearchFieldConfig.detectSubFieldType(fieldSpec).hasWordGramSubfields()) { fields.add(SearchFieldConfig.builder() @@ -187,6 +217,20 @@ private Set getStandardFields(@Nonnull EntitySpec entitySpec) } } } + return fields; + } + + private Set getStandardFields(@Nonnull EntitySpec entitySpec) { + Set fields = new HashSet<>(); + + // Always present + final float urnBoost = Float.parseFloat((String) PRIMARY_URN_SEARCH_PROPERTIES.get("boostScore")); + + fields.add(SearchFieldConfig.detectSubFieldType("urn", urnBoost, SearchableAnnotation.FieldType.URN, true)); + fields.add(SearchFieldConfig.detectSubFieldType("urn.delimited", urnBoost * partialConfiguration.getUrnFactor(), + SearchableAnnotation.FieldType.URN, true)); + + fields.addAll(getFieldsFromEntitySpec(entitySpec)); return fields; } @@ -255,49 +299,42 @@ private Optional getPrefixAndExactMatchQuery(@Nullable QueryConfig BoolQueryBuilder finalQuery = QueryBuilders.boolQuery(); String unquotedQuery = unquote(query); - entitySpecs.stream() - .map(this::getStandardFields) - .flatMap(Set::stream) - .filter(SearchFieldConfig::isQueryByDefault) - .forEach(searchFieldConfig -> { - - if (searchFieldConfig.isDelimitedSubfield() && isPrefixQuery) { - finalQuery.should(QueryBuilders.matchPhrasePrefixQuery(searchFieldConfig.fieldName(), query) - .boost(searchFieldConfig.boost() - * exactMatchConfiguration.getPrefixFactor() - * exactMatchConfiguration.getCaseSensitivityFactor()) - .queryName(searchFieldConfig.shortName())); // less than exact - } - - if (searchFieldConfig.isKeyword() && isExactQuery) { - // It is important to use the subfield .keyword (it uses a different normalizer) - // The non-.keyword field removes case information - - // Exact match case-sensitive - finalQuery.should(QueryBuilders - .termQuery(ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), unquotedQuery) - .caseInsensitive(false) - .boost(searchFieldConfig.boost() - * exactMatchConfiguration.getExactFactor()) - .queryName(searchFieldConfig.shortName())); - - // Exact match case-insensitive - finalQuery.should(QueryBuilders - .termQuery(ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), unquotedQuery) - .caseInsensitive(true) - .boost(searchFieldConfig.boost() - * exactMatchConfiguration.getExactFactor() - * exactMatchConfiguration.getCaseSensitivityFactor()) - .queryName(searchFieldConfig.fieldName())); - } - - if (searchFieldConfig.isWordGramSubfield() && isPrefixQuery) { - finalQuery.should(QueryBuilders - .matchPhraseQuery(ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), unquotedQuery) + getStandardFields(entitySpecs).forEach(searchFieldConfig -> { + if (searchFieldConfig.isDelimitedSubfield() && isPrefixQuery) { + finalQuery.should(QueryBuilders.matchPhrasePrefixQuery(searchFieldConfig.fieldName(), query) + .boost(searchFieldConfig.boost() * exactMatchConfiguration.getPrefixFactor() + * exactMatchConfiguration.getCaseSensitivityFactor()) + .queryName(searchFieldConfig.shortName())); // less than exact + } + + if (searchFieldConfig.isKeyword() && isExactQuery) { + // It is important to use the subfield .keyword (it uses a different normalizer) + // The non-.keyword field removes case information + + // Exact match case-sensitive + finalQuery.should( + QueryBuilders.termQuery(ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), unquotedQuery) + .caseInsensitive(false) + .boost(searchFieldConfig.boost() * exactMatchConfiguration.getExactFactor()) + .queryName(searchFieldConfig.shortName())); + + // Exact match case-insensitive + finalQuery.should( + QueryBuilders.termQuery(ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), unquotedQuery) + .caseInsensitive(true) + .boost(searchFieldConfig.boost() * exactMatchConfiguration.getExactFactor() + * exactMatchConfiguration.getCaseSensitivityFactor()) + .queryName(searchFieldConfig.fieldName())); + } + + if (searchFieldConfig.isWordGramSubfield() && isPrefixQuery) { + finalQuery.should( + QueryBuilders.matchPhraseQuery(ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), + unquotedQuery) .boost(searchFieldConfig.boost() * getWordGramFactor(searchFieldConfig.fieldName())) .queryName(searchFieldConfig.shortName())); - } - }); + } + }); return finalQuery.should().size() > 0 ? Optional.of(finalQuery) : Optional.empty(); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java index 282b1d8bb6778..8e73b0ceeae8d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java @@ -1,5 +1,8 @@ package com.linkedin.metadata.search.elasticsearch.query.request; +import com.linkedin.data.schema.DataSchema; +import com.linkedin.data.schema.PathSpec; +import com.linkedin.metadata.ESTestConfiguration; import com.linkedin.metadata.config.search.CustomConfiguration; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; @@ -10,11 +13,18 @@ import com.google.common.collect.ImmutableList; import com.linkedin.metadata.TestEntitySpecBuilder; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.SearchableFieldSpec; +import com.linkedin.metadata.models.annotation.SearchableAnnotation; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.Stream; +import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.util.Pair; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.MatchAllQueryBuilder; @@ -25,8 +35,14 @@ import org.elasticsearch.index.query.SimpleQueryStringBuilder; import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.mockito.Mockito; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.annotations.Test; +import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.AUTO_COMPLETE_ENTITY_TYPES; +import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.SEARCHABLE_ENTITY_TYPES; import static com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder.TEXT_SEARCH_ANALYZER; import static com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder.URN_SEARCH_ANALYZER; import static org.testng.Assert.assertEquals; @@ -34,7 +50,12 @@ import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; -public class SearchQueryBuilderTest { +@Import(ESTestConfiguration.class) +public class SearchQueryBuilderTest extends AbstractTestNGSpringContextTests { + + @Autowired + private EntityRegistry entityRegistry; + public static SearchConfiguration testQueryConfig; static { testQueryConfig = new SearchConfiguration(); @@ -66,8 +87,8 @@ public class SearchQueryBuilderTest { @Test public void testQueryBuilderFulltext() { FunctionScoreQueryBuilder result = - (FunctionScoreQueryBuilder) TEST_BUILDER.buildQuery(ImmutableList.of(TestEntitySpecBuilder.getSpec()), "testQuery", - true); + (FunctionScoreQueryBuilder) TEST_BUILDER.buildQuery(ImmutableList.of(TestEntitySpecBuilder.getSpec()), "testQuery", + true); BoolQueryBuilder mainQuery = (BoolQueryBuilder) result.query(); List shouldQueries = mainQuery.should(); assertEquals(shouldQueries.size(), 2); @@ -80,15 +101,15 @@ public void testQueryBuilderFulltext() { Map keywordFields = keywordQuery.fields(); assertEquals(keywordFields.size(), 9); assertEquals(keywordFields, Map.of( - "urn", 10.f, - "textArrayField", 1.0f, - "customProperties", 1.0f, - "wordGramField", 1.0f, - "nestedArrayArrayField", 1.0f, - "textFieldOverride", 1.0f, - "nestedArrayStringField", 1.0f, - "keyPart1", 10.0f, - "esObjectField", 1.0f + "urn", 10.f, + "textArrayField", 1.0f, + "customProperties", 1.0f, + "wordGramField", 1.0f, + "nestedArrayArrayField", 1.0f, + "textFieldOverride", 1.0f, + "nestedArrayStringField", 1.0f, + "keyPart1", 10.0f, + "esObjectField", 1.0f )); SimpleQueryStringBuilder urnComponentQuery = (SimpleQueryStringBuilder) analyzerGroupQuery.should().get(1); @@ -153,8 +174,8 @@ public void testQueryBuilderFulltext() { @Test public void testQueryBuilderStructured() { FunctionScoreQueryBuilder result = - (FunctionScoreQueryBuilder) TEST_BUILDER.buildQuery(ImmutableList.of(TestEntitySpecBuilder.getSpec()), - "testQuery", false); + (FunctionScoreQueryBuilder) TEST_BUILDER.buildQuery(ImmutableList.of(TestEntitySpecBuilder.getSpec()), + "testQuery", false); BoolQueryBuilder mainQuery = (BoolQueryBuilder) result.query(); List shouldQueries = mainQuery.should(); assertEquals(shouldQueries.size(), 2); @@ -263,4 +284,138 @@ public void testCustomDefault() { assertEquals(termQueryBuilder.value().toString(), triggerQuery); } } -} + + /** + * Tests to make sure that the fields are correctly combined across search-able entities + */ + @Test + public void testGetStandardFieldsEntitySpec() { + List entitySpecs = Stream.concat(SEARCHABLE_ENTITY_TYPES.stream(), AUTO_COMPLETE_ENTITY_TYPES.stream()) + .map(entityType -> entityType.toString().toLowerCase().replaceAll("_", "")) + .map(entityRegistry::getEntitySpec) + .collect(Collectors.toList()); + assertTrue(entitySpecs.size() > 30, "Expected at least 30 searchable entities in the registry"); + + // Count of the distinct field names + Set expectedFieldNames = Stream.concat( + // Standard urn fields plus entitySpec sourced fields + Stream.of("urn", "urn.delimited"), + entitySpecs.stream() + .flatMap(spec -> TEST_CUSTOM_BUILDER.getFieldsFromEntitySpec(spec).stream()) + .map(SearchFieldConfig::fieldName)) + .collect(Collectors.toSet()); + + Set actualFieldNames = TEST_CUSTOM_BUILDER.getStandardFields(entitySpecs).stream() + .map(SearchFieldConfig::fieldName) + .collect(Collectors.toSet()); + + assertEquals(actualFieldNames, expectedFieldNames, + String.format("Missing: %s Extra: %s", + expectedFieldNames.stream().filter(f -> !actualFieldNames.contains(f)).collect(Collectors.toSet()), + actualFieldNames.stream().filter(f -> !expectedFieldNames.contains(f)).collect(Collectors.toSet()))); + } + + @Test + public void testGetStandardFields() { + Set fieldConfigs = TEST_CUSTOM_BUILDER.getStandardFields(ImmutableList.of(TestEntitySpecBuilder.getSpec())); + assertEquals(fieldConfigs.size(), 21); + assertEquals(fieldConfigs.stream().map(SearchFieldConfig::fieldName).collect(Collectors.toSet()), Set.of( + "nestedArrayArrayField", + "esObjectField", + "foreignKey", + "keyPart1", + "nestedForeignKey", + "textArrayField.delimited", + "nestedArrayArrayField.delimited", + "wordGramField.delimited", + "wordGramField.wordGrams4", + "textFieldOverride", + "nestedArrayStringField.delimited", + "urn.delimited", + "textArrayField", + "keyPart1.delimited", + "nestedArrayStringField", + "wordGramField", + "customProperties", + "wordGramField.wordGrams3", + "textFieldOverride.delimited", + "urn", + "wordGramField.wordGrams2")); + + assertEquals(fieldConfigs.stream().filter(field -> field.fieldName().equals("keyPart1")).findFirst().map(SearchFieldConfig::boost), Optional.of( + 10.0F)); + assertEquals(fieldConfigs.stream().filter(field -> field.fieldName().equals("nestedForeignKey")).findFirst().map(SearchFieldConfig::boost), Optional.of( + 1.0F)); + assertEquals(fieldConfigs.stream().filter(field -> field.fieldName().equals("textFieldOverride")).findFirst().map(SearchFieldConfig::boost), Optional.of( + 1.0F)); + + EntitySpec mockEntitySpec = Mockito.mock(EntitySpec.class); + Mockito.when(mockEntitySpec.getSearchableFieldSpecs()).thenReturn(List.of( + new SearchableFieldSpec( + Mockito.mock(PathSpec.class), + new SearchableAnnotation("fieldDoesntExistInOriginal", + SearchableAnnotation.FieldType.TEXT, + true, true, false, false, + Optional.empty(), Optional.empty(), 13.0, + Optional.empty(), Optional.empty(), Map.of(), List.of()), + Mockito.mock(DataSchema.class)), + new SearchableFieldSpec( + Mockito.mock(PathSpec.class), + new SearchableAnnotation("keyPart1", + SearchableAnnotation.FieldType.KEYWORD, + true, true, false, false, + Optional.empty(), Optional.empty(), 20.0, + Optional.empty(), Optional.empty(), Map.of(), List.of()), + Mockito.mock(DataSchema.class)), + new SearchableFieldSpec( + Mockito.mock(PathSpec.class), + new SearchableAnnotation("textFieldOverride", + SearchableAnnotation.FieldType.WORD_GRAM, + true, true, false, false, + Optional.empty(), Optional.empty(), 3.0, + Optional.empty(), Optional.empty(), Map.of(), List.of()), + Mockito.mock(DataSchema.class))) + ); + + fieldConfigs = TEST_CUSTOM_BUILDER.getStandardFields(ImmutableList.of(TestEntitySpecBuilder.getSpec(), mockEntitySpec)); + // Same 21 from the original entity + newFieldNotInOriginal + 3 word gram fields from the textFieldOverride + assertEquals(fieldConfigs.size(), 26); + assertEquals(fieldConfigs.stream().map(SearchFieldConfig::fieldName).collect(Collectors.toSet()), Set.of( + "nestedArrayArrayField", + "esObjectField", + "foreignKey", + "keyPart1", + "nestedForeignKey", + "textArrayField.delimited", + "nestedArrayArrayField.delimited", + "wordGramField.delimited", + "wordGramField.wordGrams4", + "textFieldOverride", + "nestedArrayStringField.delimited", + "urn.delimited", + "textArrayField", + "keyPart1.delimited", + "nestedArrayStringField", + "wordGramField", + "customProperties", + "wordGramField.wordGrams3", + "textFieldOverride.delimited", + "urn", + "wordGramField.wordGrams2", + "fieldDoesntExistInOriginal", + "fieldDoesntExistInOriginal.delimited", + "textFieldOverride.wordGrams2", + "textFieldOverride.wordGrams3", + "textFieldOverride.wordGrams4")); + + // Field which only exists in first one: Should be the same + assertEquals(fieldConfigs.stream().filter(field -> field.fieldName().equals("nestedForeignKey")).findFirst().map(SearchFieldConfig::boost), Optional.of( + 1.0F)); + // Average boost value: 10 vs. 20 -> 15 + assertEquals(fieldConfigs.stream().filter(field -> field.fieldName().equals("keyPart1")).findFirst().map(SearchFieldConfig::boost), Optional.of( + 15.0F)); + // Field which added word gram fields: Original boost should be boost value averaged + assertEquals(fieldConfigs.stream().filter(field -> field.fieldName().equals("textFieldOverride")).findFirst().map(SearchFieldConfig::boost), Optional.of( + 2.0F)); + } +} \ No newline at end of file From 56c00f55868cf25a4608006788dc98e707e88c94 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 7 Sep 2023 17:09:52 -0500 Subject: [PATCH 14/20] feat(openapi): entity endpoints & analytics raw (#8537) --- build.gradle | 6 +- buildSrc/build.gradle | 3 + .../linkedin/metadata/models/registry/config | 1 + .../GenerateJsonSchemaTask.java | 47 +- .../io/datahubproject/OpenApiEntities.java | 603 ++++++++++++++++++ datahub-frontend/conf/routes | 2 + .../types/dataset/mappers/DatasetMapper.java | 3 + .../models/registry/config/Entity.java | 5 + li-utils/build.gradle | 2 +- metadata-auth/auth-api/build.gradle | 2 +- .../java/datahub-client/build.gradle | 2 +- metadata-io/build.gradle | 28 - .../metadata/entity/EntityServiceImpl.java | 10 +- .../metadata/search/SearchService.java | 2 +- .../client/CachingEntitySearchService.java | 6 +- .../elasticsearch/ElasticSearchService.java | 9 +- .../elasticsearch/query/ESSearchDAO.java | 37 +- .../query/request/SearchRequestHandler.java | 2 +- .../metadata/search/utils/ESUtils.java | 4 +- metadata-models/build.gradle | 46 +- .../resources/JavaSpring/interface.mustache | 25 + .../main/resources/JavaSpring/model.mustache | 41 ++ .../main/resources/JavaSpring/pojo.mustache | 177 +++++ .../src/main/resources/JavaSpring/readme.txt | 2 + .../src/main/resources/entity-registry.yml | 6 + .../health/config/SpringWebConfig.java | 4 +- .../openapi-analytics-servlet/build.gradle | 67 ++ .../config/OpenapiAnalyticsConfig.java | 14 + .../delegates/DatahubUsageEventsImpl.java | 48 ++ .../JavaSpring/apiController.mustache | 160 +++++ .../src/main/resources/JavaSpring/readme.txt | 2 + .../src/main/resources/open-api.yaml | 31 + .../OpenAPIAnalyticsTestConfiguration.java | 48 ++ .../delegates/DatahubUsageEventsImplTest.java | 44 ++ .../openapi-entity-servlet/build.gradle | 82 +++ .../generators/build.gradle | 10 + .../datahubproject/CustomSpringCodegen.java | 43 ++ .../delegates/EntityApiDelegateImpl.java | 411 ++++++++++++ .../openapi/util/OpenApiEntitiesUtil.java | 279 ++++++++ .../main/resources/JavaSpring/api.mustache | 172 +++++ .../JavaSpring/apiController.mustache | 127 ++++ .../src/main/resources/JavaSpring/readme.txt | 2 + .../src/main/resources/entity-v2.0.yml | 6 + .../OpenAPIEntityTestConfiguration.java | 119 ++++ .../delegates/EntityApiDelegateImplTest.java | 203 ++++++ .../openapi/util/OpenApiEntitiesUtilTest.java | 55 ++ .../openapi/config/SpringWebConfig.java | 4 +- .../openapi/entities/EntitiesController.java | 4 +- .../openapi/util/MappingUtil.java | 260 +++++--- .../openapi/util/ReflectionCache.java | 138 ++++ .../java/entities/EntitiesControllerTest.java | 81 +-- .../schema-registry-api/build.gradle | 2 +- .../config/SpringWebSchemaRegistryConfig.java | 4 +- .../metadata/entity/EntityService.java | 10 +- metadata-service/war/build.gradle | 2 + .../war/src/main/resources/boot/policies.json | 6 +- .../authorization/PoliciesConfig.java | 6 + settings.gradle | 3 + 58 files changed, 3309 insertions(+), 209 deletions(-) create mode 120000 buildSrc/src/main/java/com/linkedin/metadata/models/registry/config create mode 100644 buildSrc/src/main/java/io/datahubproject/OpenApiEntities.java create mode 100644 metadata-models/src/main/resources/JavaSpring/interface.mustache create mode 100644 metadata-models/src/main/resources/JavaSpring/model.mustache create mode 100644 metadata-models/src/main/resources/JavaSpring/pojo.mustache create mode 100644 metadata-models/src/main/resources/JavaSpring/readme.txt create mode 100644 metadata-service/openapi-analytics-servlet/build.gradle create mode 100644 metadata-service/openapi-analytics-servlet/src/main/java/io/datahubproject/openapi/config/OpenapiAnalyticsConfig.java create mode 100644 metadata-service/openapi-analytics-servlet/src/main/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImpl.java create mode 100644 metadata-service/openapi-analytics-servlet/src/main/resources/JavaSpring/apiController.mustache create mode 100644 metadata-service/openapi-analytics-servlet/src/main/resources/JavaSpring/readme.txt create mode 100644 metadata-service/openapi-analytics-servlet/src/main/resources/open-api.yaml create mode 100644 metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java create mode 100644 metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java create mode 100644 metadata-service/openapi-entity-servlet/build.gradle create mode 100644 metadata-service/openapi-entity-servlet/generators/build.gradle create mode 100644 metadata-service/openapi-entity-servlet/generators/src/main/java/io/datahubproject/CustomSpringCodegen.java create mode 100644 metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java create mode 100644 metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/util/OpenApiEntitiesUtil.java create mode 100644 metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/api.mustache create mode 100644 metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/apiController.mustache create mode 100644 metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/readme.txt create mode 100644 metadata-service/openapi-entity-servlet/src/main/resources/entity-v2.0.yml create mode 100644 metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java create mode 100644 metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/delegates/EntityApiDelegateImplTest.java create mode 100644 metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java create mode 100644 metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/ReflectionCache.java diff --git a/build.gradle b/build.gradle index e12d520e12de6..1b6b82d51c2d4 100644 --- a/build.gradle +++ b/build.gradle @@ -146,7 +146,7 @@ project.ext.externalDependency = [ 'log4jApi': "org.apache.logging.log4j:log4j-api:$log4jVersion", 'log4j12Api': "org.slf4j:log4j-over-slf4j:$slf4jVersion", 'log4j2Api': "org.apache.logging.log4j:log4j-to-slf4j:$log4jVersion", - 'lombok': 'org.projectlombok:lombok:1.18.12', + 'lombok': 'org.projectlombok:lombok:1.18.16', 'mariadbConnector': 'org.mariadb.jdbc:mariadb-java-client:2.6.0', 'mavenArtifact': "org.apache.maven:maven-artifact:$mavenVersion", 'mixpanel': 'com.mixpanel:mixpanel-java:1.4.4', @@ -187,7 +187,7 @@ project.ext.externalDependency = [ 'springBeans': "org.springframework:spring-beans:$springVersion", 'springContext': "org.springframework:spring-context:$springVersion", 'springCore': "org.springframework:spring-core:$springVersion", - 'springDocUI': 'org.springdoc:springdoc-openapi-ui:1.6.7', + 'springDocUI': 'org.springdoc:springdoc-openapi-ui:1.6.14', 'springJdbc': "org.springframework:spring-jdbc:$springVersion", 'springWeb': "org.springframework:spring-web:$springVersion", 'springWebMVC': "org.springframework:spring-webmvc:$springVersion", @@ -197,9 +197,11 @@ project.ext.externalDependency = [ 'springBootStarterWeb': "org.springframework.boot:spring-boot-starter-web:$springBootVersion", 'springBootStarterJetty': "org.springframework.boot:spring-boot-starter-jetty:$springBootVersion", 'springBootStarterCache': "org.springframework.boot:spring-boot-starter-cache:$springBootVersion", + 'springBootStarterValidation': "org.springframework.boot:spring-boot-starter-validation:$springBootVersion", 'springKafka': 'org.springframework.kafka:spring-kafka:2.8.11', 'springActuator': "org.springframework.boot:spring-boot-starter-actuator:$springBootVersion", 'swaggerAnnotations': 'io.swagger.core.v3:swagger-annotations:2.1.12', + 'swaggerCli': 'io.swagger.codegen.v3:swagger-codegen-cli:3.0.41', 'testng': 'org.testng:testng:7.3.0', 'testContainers': 'org.testcontainers:testcontainers:' + testContainersVersion, 'testContainersJunit': 'org.testcontainers:junit-jupiter:' + testContainersVersion, diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle index f88d2bdb966ce..65b3780431db9 100644 --- a/buildSrc/build.gradle +++ b/buildSrc/build.gradle @@ -13,4 +13,7 @@ dependencies { implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.5' implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.13.5' implementation 'commons-io:commons-io:2.11.0' + + compileOnly 'org.projectlombok:lombok:1.18.14' + annotationProcessor 'org.projectlombok:lombok:1.18.14' } \ No newline at end of file diff --git a/buildSrc/src/main/java/com/linkedin/metadata/models/registry/config b/buildSrc/src/main/java/com/linkedin/metadata/models/registry/config new file mode 120000 index 0000000000000..ea22cc67da2d4 --- /dev/null +++ b/buildSrc/src/main/java/com/linkedin/metadata/models/registry/config @@ -0,0 +1 @@ +../../../../../../../../../entity-registry/src/main/java/com/linkedin/metadata/models/registry/config \ No newline at end of file diff --git a/buildSrc/src/main/java/io/datahubproject/GenerateJsonSchemaTask.java b/buildSrc/src/main/java/io/datahubproject/GenerateJsonSchemaTask.java index 796d622860c15..25bf239ab835b 100644 --- a/buildSrc/src/main/java/io/datahubproject/GenerateJsonSchemaTask.java +++ b/buildSrc/src/main/java/io/datahubproject/GenerateJsonSchemaTask.java @@ -20,6 +20,7 @@ import java.util.HashSet; import java.util.List; import java.util.stream.Collectors; + import org.gradle.api.DefaultTask; import org.gradle.api.tasks.*; @@ -31,13 +32,27 @@ public class GenerateJsonSchemaTask extends DefaultTask { private String inputDirectory; private String outputDirectory; + private ArrayNode aspectType; private Path combinedDirectory; + private Path jsonDirectory; public static final String sep = FileSystems.getDefault().getSeparator(); private static final JsonNodeFactory NODE_FACTORY = JacksonUtils.nodeFactory(); + private static final OpenApiEntities openApiEntities = new OpenApiEntities(NODE_FACTORY); + + @InputFile + @PathSensitive(PathSensitivity.NAME_ONLY) + public String getEntityRegistryYaml() { + return openApiEntities.getEntityRegistryYaml(); + } + + public void setEntityRegistryYaml(String entityRegistryYaml) { + openApiEntities.setEntityRegistryYaml(entityRegistryYaml); + } + public void setInputDirectory(String inputDirectory) { this.inputDirectory = inputDirectory; } @@ -78,6 +93,7 @@ public void generate() throws IOException { .filter(Files::isRegularFile) .map(Path::toFile) .forEach(this::generateSchema); + List nodesList = Files.walk(jsonDirectory) .filter(Files::isRegularFile) .filter(path -> { @@ -108,6 +124,18 @@ public void generate() throws IOException { } schemasNode.setAll(definitions); }); + + combinedDirectory = Paths.get(outputDirectory + sep + "combined"); + try { + Files.createDirectory(combinedDirectory); + } catch (FileAlreadyExistsException fae) { + // No-op + } + + // Add additional components and paths + openApiEntities.setCombinedDirectory(combinedDirectory); + ObjectNode extendedNode = openApiEntities.entityExtension(nodesList, schemasNode); + /* Minimal OpenAPI header openapi: 3.0.1 @@ -131,29 +159,23 @@ public void generate() throws IOException { .set("paths", NODE_FACTORY.objectNode() .set("/path", NODE_FACTORY.objectNode() .set("get", NODE_FACTORY.objectNode().set("tags", NODE_FACTORY.arrayNode().add("path"))))); - JsonNode combinedSchemaDefinitionsYaml = ((ObjectNode) NODE_FACTORY.objectNode().set("components", - NODE_FACTORY.objectNode().set("schemas", schemasNode))).setAll(yamlHeader); + + JsonNode combinedSchemaDefinitionsYaml = extendedNode.setAll(yamlHeader); final String yaml = new YAMLMapper().writeValueAsString(combinedSchemaDefinitionsYaml) - .replaceAll("definitions", "components/schemas") - .replaceAll("\n\\s+- type: \"null\"", ""); + .replaceAll("definitions", "components/schemas") + .replaceAll("\n\\s+description: null", "") + .replaceAll("\n\\s+- type: \"null\"", ""); - combinedDirectory = Paths.get(outputDirectory + sep + "combined"); - try { - Files.createDirectory(combinedDirectory); - } catch (FileAlreadyExistsException fae) { - // No-op - } Files.write(Paths.get(combinedDirectory + sep + "open-api.yaml"), yaml.getBytes(StandardCharsets.UTF_8), StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); - JsonNode combinedSchemaDefinitionsJson = NODE_FACTORY.objectNode().set("definitions",schemasNode); + JsonNode combinedSchemaDefinitionsJson = NODE_FACTORY.objectNode().set("definitions", extendedNode); String prettySchema = JacksonUtils.prettyPrint(combinedSchemaDefinitionsJson); Files.write(Paths.get(Paths.get(outputDirectory) + sep + "combined" + sep + "schema.json"), prettySchema.getBytes(StandardCharsets.UTF_8), StandardOpenOption.WRITE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); - } private final HashSet filenames = new HashSet<>(); @@ -183,5 +205,4 @@ private void generateSchema(final File file) { throw new RuntimeException(e); } } - } \ No newline at end of file diff --git a/buildSrc/src/main/java/io/datahubproject/OpenApiEntities.java b/buildSrc/src/main/java/io/datahubproject/OpenApiEntities.java new file mode 100644 index 0000000000000..7fbf013384b7d --- /dev/null +++ b/buildSrc/src/main/java/io/datahubproject/OpenApiEntities.java @@ -0,0 +1,603 @@ +package io.datahubproject; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; +import com.linkedin.metadata.models.registry.config.Entities; +import com.linkedin.metadata.models.registry.config.Entity; +import org.gradle.internal.Pair; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.*; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +public class OpenApiEntities { + private final static String MODEL_VERSION = "_v2"; + private final static String REQUEST_SUFFIX = "Request" + MODEL_VERSION; + private final static String RESPONSE_SUFFIX = "Response" + MODEL_VERSION; + + private final static String ASPECT_REQUEST_SUFFIX = "Aspect" + REQUEST_SUFFIX; + private final static String ASPECT_RESPONSE_SUFFIX = "Aspect" + RESPONSE_SUFFIX; + private final static String ENTITY_REQUEST_SUFFIX = "Entity" + REQUEST_SUFFIX; + private final static String ENTITY_RESPONSE_SUFFIX = "Entity" + RESPONSE_SUFFIX; + + private final JsonNodeFactory NODE_FACTORY; + private Map entityMap; + private String entityRegistryYaml; + private Path combinedDirectory; + + private final static Set SUPPORTED_ASPECT_PATHS = Set.of( + "domains", "ownership", "deprecation", "status", "globalTags", "glossaryTerms", "dataContractInfo", + "browsePathsV2" + ); + + public OpenApiEntities(JsonNodeFactory NODE_FACTORY) { + this.NODE_FACTORY = NODE_FACTORY; + } + + public String getEntityRegistryYaml() { + return entityRegistryYaml; + } + + public void setEntityRegistryYaml(String entityRegistryYaml) { + this.entityRegistryYaml = entityRegistryYaml; + ObjectMapper mapper = new ObjectMapper(new YAMLFactory()); + mapper.findAndRegisterModules(); + try { + Entities entities = mapper.readValue(Paths.get(entityRegistryYaml).toFile(), Entities.class); + entityMap = entities.getEntities().stream() + .filter(e -> "core".equals(e.getCategory())) + .collect(Collectors.toMap(Entity::getName, Function.identity())); + } catch (IOException e) { + throw new IllegalArgumentException( + String.format("Error while reading entity yaml file in path %s: %s", entityRegistryYaml, e.getMessage())); + } + } + + public Path getCombinedDirectory() { + return combinedDirectory; + } + + public void setCombinedDirectory(Path combinedDirectory) { + this.combinedDirectory = combinedDirectory; + } + + public ObjectNode entityExtension(List nodesList, ObjectNode schemasNode) throws IOException { + // Generate entities schema + Set aspectDefinitions = nodesList.stream() + .map(nl -> nl.get("definitions").fieldNames()) + .flatMap(it -> StreamSupport.stream(Spliterators.spliteratorUnknownSize(it, Spliterator.ORDERED), false)) + .collect(Collectors.toSet()); + withWrappedAspects(schemasNode, aspectDefinitions); + + // Add entity schema + Set entitySchema = withEntitySchema(schemasNode, aspectDefinitions); + + // Write specific sections: components.* and paths + Set modelDefinitions = Stream.concat(aspectDefinitions.stream(), entitySchema.stream()) + .collect(Collectors.toSet()); + + // Just the component & parameters schema + Pair> parameters = buildParameters(schemasNode, modelDefinitions); + ObjectNode componentsNode = writeComponentsYaml(schemasNode, parameters.left()); + + // Just the entity paths + writePathsYaml(modelDefinitions, parameters.right()); + + return componentsNode; + } + + private static String toUpperFirst(String s) { + return s.substring(0, 1).toUpperCase() + s.substring(1); + } + + private Set withEntitySchema(ObjectNode schemasNode, Set definitions) { + return entityMap.values().stream() + // Make sure the primary key is defined + .filter(entity -> definitions.contains(toUpperFirst(entity.getKeyAspect()))) + .map(entity -> { + final String upperName = toUpperFirst(entity.getName()); + + ObjectNode entityDefinitions = NODE_FACTORY.objectNode(); + entityDefinitions.set(upperName + ENTITY_RESPONSE_SUFFIX, buildEntitySchema(entity, definitions, true)); + entityDefinitions.set(upperName + ENTITY_REQUEST_SUFFIX, buildEntitySchema(entity, definitions, false)); + entityDefinitions.set("Scroll" + upperName + ENTITY_RESPONSE_SUFFIX, buildEntityScrollSchema(entity)); + + schemasNode.setAll(entityDefinitions); + + return upperName; + }).collect(Collectors.toSet()); + } + + + private Set withWrappedAspects(ObjectNode schemasNode, Set aspects) { + return aspects.stream().peek(aspect -> { + ObjectNode aspectRef = NODE_FACTORY.objectNode() + .put("$ref", "#/definitions/" + aspect); + + ObjectNode responseProperties = NODE_FACTORY.objectNode(); + responseProperties.set("value", aspectRef); + responseProperties.set("systemMetadata", NODE_FACTORY.objectNode() + .put("description", "System metadata for the aspect.") + .put("$ref", "#/definitions/SystemMetadata")); + + ObjectNode responseWrapper = NODE_FACTORY.objectNode() + .put("type", "object") + .put("description", "Aspect wrapper object.") + .set("properties", responseProperties); + responseWrapper.set("required", NODE_FACTORY.arrayNode().add("value")); + schemasNode.set(aspect + ASPECT_RESPONSE_SUFFIX, responseWrapper); + + ObjectNode requestProperties = NODE_FACTORY.objectNode(); + requestProperties.set("value", aspectRef); + + ObjectNode requestWrapper = NODE_FACTORY.objectNode() + .put("type", "object") + .put("description", "Aspect wrapper object.") + .set("properties", requestProperties); + requestWrapper.set("required", NODE_FACTORY.arrayNode().add("value")); + schemasNode.set(aspect + ASPECT_REQUEST_SUFFIX, requestWrapper); + }).collect(Collectors.toSet()); + } + + private ObjectNode buildEntitySchema(Entity entity, Set aspectDefinitions, boolean isResponse) { + ObjectNode propertiesNode = NODE_FACTORY.objectNode(); + + propertiesNode.set("urn", NODE_FACTORY.objectNode() + .put("description", "Unique id for " + entity.getName()) + .put("type", "string")); + + propertiesNode.set(entity.getKeyAspect(), buildAspectRef(entity.getKeyAspect(), isResponse)); + + entity.getAspects().stream() + .filter(aspect -> aspectDefinitions.contains(toUpperFirst(aspect))) // Only if aspect is defined + .forEach(aspect -> propertiesNode.set(aspect, buildAspectRef(aspect, isResponse))); + + ObjectNode entityNode = NODE_FACTORY.objectNode() + .put("type", "object") + .put("description", Optional.ofNullable(entity.getDoc()) + .orElse(toUpperFirst(entity.getName()) + " object.")) + .set("properties", propertiesNode); + entityNode.set("required", NODE_FACTORY.arrayNode().add("urn")); + + return entityNode; + } + + private ObjectNode buildEntityScrollSchema(Entity entity) { + ObjectNode scrollResponsePropertiesNode = NODE_FACTORY.objectNode(); + + scrollResponsePropertiesNode.set("scrollId", NODE_FACTORY.objectNode() + .put("description", "Scroll id for pagination.") + .put("type", "string")); + + scrollResponsePropertiesNode.set("entities", NODE_FACTORY.objectNode() + .put("description", Optional.ofNullable(entity.getDoc()) + .orElse(toUpperFirst(entity.getName()) + " object.")) + .put("type", "array") + .set("items", NODE_FACTORY.objectNode().put("$ref", + String.format("#/components/schemas/%s%s", toUpperFirst(entity.getName()), ENTITY_RESPONSE_SUFFIX)))); + + ObjectNode scrollResponseNode = NODE_FACTORY.objectNode() + .put("type", "object") + .put("description", "Scroll across " + toUpperFirst(entity.getName()) + " objects.") + .set("properties", scrollResponsePropertiesNode); + scrollResponseNode.set("required", NODE_FACTORY.arrayNode().add("entities")); + + return scrollResponseNode; + } + + + private ObjectNode buildAspectRef(String aspect, boolean withSystemMetadata) { + if (withSystemMetadata) { + return NODE_FACTORY.objectNode() + .put("$ref", String.format("#/definitions/%s%s", toUpperFirst(aspect), ASPECT_RESPONSE_SUFFIX)); + } else { + return NODE_FACTORY.objectNode() + .put("$ref", String.format("#/definitions/%s%s", toUpperFirst(aspect), ASPECT_REQUEST_SUFFIX)); + } + } + + private Optional> generateEntityParameters(final Entity entity, Set definitions) { + /* + If not missing key + */ + if (definitions.contains(toUpperFirst(entity.getKeyAspect()))) { + final String parameterName = toUpperFirst(entity.getName()) + "Aspects"; + + ArrayNode aspects = NODE_FACTORY.arrayNode(); + entity.getAspects().stream() + .filter(aspect -> definitions.contains(toUpperFirst(aspect))) // Only if aspect is defined + .distinct() + .forEach(aspects::add); + + if (aspects.isEmpty()) { + aspects.add(entity.getKeyAspect()); + } + + ObjectNode itemsNode = NODE_FACTORY.objectNode() + .put("type", "string"); + itemsNode.set("enum", aspects); + itemsNode.set("default", aspects); + + ObjectNode schemaNode = NODE_FACTORY.objectNode() + .put("type", "array") + .set("items", itemsNode); + ObjectNode parameterSchemaNode = NODE_FACTORY.objectNode() + .put("in", "query") + .put("name", "aspects") + .put("explode", true) + .put("description", "Aspects to include in response.") + .set("schema", schemaNode); + + parameterSchemaNode.set("example", aspects); + + ObjectNode parameterNode = NODE_FACTORY.objectNode() + .set(parameterName + MODEL_VERSION, parameterSchemaNode); + + return Optional.of(Pair.of(parameterName, parameterNode)); + } + + return Optional.empty(); + } + + private Pair> buildParameters(ObjectNode schemasNode, Set definitions) { + ObjectNode parametersNode = NODE_FACTORY.objectNode(); + Set parameterDefinitions = entityMap.values().stream() + .flatMap(entity -> generateEntityParameters(entity, definitions).stream()) + .map(entityNode -> { + parametersNode.setAll(entityNode.right()); + return entityNode.left(); + }) + .collect(Collectors.toSet()); + + return Pair.of(extraParameters(parametersNode), parameterDefinitions); + } + + private ObjectNode writeComponentsYaml(ObjectNode schemasNode, ObjectNode parametersNode) throws IOException { + ObjectNode componentsNode = NODE_FACTORY.objectNode(); + componentsNode.set("schemas", schemasNode); + componentsNode.set("parameters", extraParameters(parametersNode)); + ObjectNode componentsDocNode = NODE_FACTORY.objectNode().set("components", componentsNode); + + final String componentsYaml = new YAMLMapper().writeValueAsString(componentsDocNode) + .replaceAll("definitions", "components/schemas") + .replaceAll("\n\\s+description: null", "") + .replaceAll("\n\\s+- type: \"null\"", ""); + Files.write(Paths.get(combinedDirectory + GenerateJsonSchemaTask.sep + "open-api-components.yaml"), + componentsYaml.getBytes(StandardCharsets.UTF_8), StandardOpenOption.WRITE, StandardOpenOption.CREATE, + StandardOpenOption.TRUNCATE_EXISTING); + + return componentsDocNode; + } + + private ObjectNode extraParameters(ObjectNode parametersNode) { + parametersNode.set("ScrollId" + MODEL_VERSION, NODE_FACTORY.objectNode() + .put("in", "query") + .put("name", "scrollId") + .put("description", "Scroll pagination token.") + .set("schema", NODE_FACTORY.objectNode() + .put("type", "string"))); + + ArrayNode sortFields = NODE_FACTORY.arrayNode(); + sortFields.add("urn"); + ObjectNode sortFieldsNode = NODE_FACTORY.objectNode() + .put("type", "string"); + sortFieldsNode.set("enum", sortFields); + sortFieldsNode.set("default", sortFields.get(0)); + + ObjectNode sortFieldsSchemaNode = NODE_FACTORY.objectNode() + .put("type", "array") + .put("default", "urn") + .set("items", sortFieldsNode); + parametersNode.set("SortBy" + MODEL_VERSION, NODE_FACTORY.objectNode() + .put("in", "query") + .put("name", "sort") + .put("explode", true) + .put("description", "Sort fields for pagination.") + .put("example", "urn") + .set("schema", sortFieldsSchemaNode)); + + parametersNode.set("SortOrder" + MODEL_VERSION, NODE_FACTORY.objectNode() + .put("in", "query") + .put("name", "sortOrder") + .put("explode", true) + .put("description", "Sort direction field for pagination.") + .put("example", "ASCENDING") + .set("schema", NODE_FACTORY.objectNode() + .put("default", "ASCENDING") + .put("$ref", "#/components/schemas/SortOrder"))); + + parametersNode.set("PaginationCount" + MODEL_VERSION, NODE_FACTORY.objectNode() + .put("in", "query") + .put("name", "count") + .put("description", "Number of items per page.") + .put("example", "10") + .set("schema", NODE_FACTORY.objectNode() + .put("type", "integer") + .put("default", 10) + .put("minimum", 1))); + parametersNode.set("ScrollQuery" + MODEL_VERSION, NODE_FACTORY.objectNode() + .put("in", "query") + .put("name", "query") + .put("description", "Structured search query.") + .put("example", "*") + .set("schema", NODE_FACTORY.objectNode() + .put("type", "string") + .put("default", "*"))); + + return parametersNode; + } + + private void writePathsYaml(Set modelDefinitions, Set parameterDefinitions) throws IOException { + ObjectNode pathsNode = NODE_FACTORY.objectNode(); + + entityMap.values().stream() + .filter(e -> modelDefinitions.contains(toUpperFirst(e.getName()))) + .forEach(entity -> { + + pathsNode.set(String.format("/%s", entity.getName().toLowerCase()), + buildListEntityPath(entity, parameterDefinitions)); + + pathsNode.set(String.format("/%s/{urn}", entity.getName().toLowerCase()), + buildSingleEntityPath(entity, parameterDefinitions)); + + }); + + buildEntityAspectPaths(pathsNode, modelDefinitions); + + ObjectNode pathsDocNode = NODE_FACTORY.objectNode().set("paths", pathsNode); + + final String componentsYaml = new YAMLMapper().writeValueAsString(pathsDocNode) + .replaceAll("\n\\s+- type: \"null\"", "") + .replaceAll("\n\\s+description: null", ""); + Files.write(Paths.get(combinedDirectory + GenerateJsonSchemaTask.sep + "open-api-paths.yaml"), + componentsYaml.getBytes(StandardCharsets.UTF_8), StandardOpenOption.WRITE, StandardOpenOption.CREATE, + StandardOpenOption.TRUNCATE_EXISTING); + } + + private void buildEntityAspectPaths(ObjectNode pathsNode, Set modelDefinitions) { + entityMap.values().stream() + .filter(e -> modelDefinitions.contains(toUpperFirst(e.getName()))) + .forEach(entity -> { + entity.getAspects().stream() + .filter(aspect -> SUPPORTED_ASPECT_PATHS.contains(aspect)) + .filter(aspect -> modelDefinitions.contains(toUpperFirst(aspect))) + .forEach(aspect -> pathsNode.set(String.format("/%s/{urn}/%s", + entity.getName().toLowerCase(), aspect.toLowerCase()), + buildSingleEntityAspectPath(entity, aspect))); + }); + } + + private ObjectNode buildListEntityPath(Entity entity, Set parameterDefinitions) { + final String upperFirst = toUpperFirst(entity.getName()); + final String aspectParameterName = upperFirst + "Aspects"; + ArrayNode tagsNode = NODE_FACTORY.arrayNode() + .add(entity.getName() + " Entity"); + + ObjectNode scrollMethod = NODE_FACTORY.objectNode() + .put("summary", String.format("Scroll %s.", upperFirst)) + .put("operationId", String.format("scroll", upperFirst)); + + ArrayNode scrollPathParametersNode = NODE_FACTORY.arrayNode(); + scrollMethod.set("parameters", scrollPathParametersNode); + scrollPathParametersNode.add(NODE_FACTORY.objectNode() + .put("in", "query") + .put("name", "systemMetadata") + .put("description", "Include systemMetadata with response.") + .set("schema", NODE_FACTORY.objectNode() + .put("type", "boolean") + .put("default", false))); + if (parameterDefinitions.contains(aspectParameterName)) { + scrollPathParametersNode.add(NODE_FACTORY.objectNode() + .put("$ref", String.format("#/components/parameters/%s", aspectParameterName + MODEL_VERSION))); + } + scrollPathParametersNode.add(NODE_FACTORY.objectNode() + .put("$ref", "#/components/parameters/PaginationCount" + MODEL_VERSION)); + scrollPathParametersNode.add(NODE_FACTORY.objectNode() + .put("$ref", "#/components/parameters/ScrollId" + MODEL_VERSION)); + scrollPathParametersNode.add(NODE_FACTORY.objectNode() + .put("$ref", "#/components/parameters/SortBy" + MODEL_VERSION)); + scrollPathParametersNode.add(NODE_FACTORY.objectNode() + .put("$ref", "#/components/parameters/SortOrder" + MODEL_VERSION)); + scrollPathParametersNode.add(NODE_FACTORY.objectNode() + .put("$ref", "#/components/parameters/ScrollQuery" + MODEL_VERSION)); + scrollMethod.set("parameters", scrollPathParametersNode); + scrollMethod.set("responses", NODE_FACTORY.objectNode() + .set("200", NODE_FACTORY.objectNode().put("description", "Success") + .set("content", NODE_FACTORY.objectNode().set("application/json", NODE_FACTORY.objectNode() + .set("schema", NODE_FACTORY.objectNode() + .put("$ref", String.format("#/components/schemas/Scroll%s%s", upperFirst, ENTITY_RESPONSE_SUFFIX))))))); + scrollMethod.set("tags", tagsNode); + + ObjectNode postMethod = NODE_FACTORY.objectNode() + .put("summary", "Create " + upperFirst) + .put("operationId", String.format("create", upperFirst)); + postMethod.set("requestBody", NODE_FACTORY.objectNode() + .put("description", "Create " + entity.getName() + " entities.") + .put("required", true) + .set("content", NODE_FACTORY.objectNode().set("application/json", NODE_FACTORY.objectNode() + .set("schema", NODE_FACTORY.objectNode().put("type", "array") + .set("items", NODE_FACTORY.objectNode().put("$ref", + String.format("#/components/schemas/%s%s", upperFirst, ENTITY_REQUEST_SUFFIX))))))); + postMethod.set("responses", NODE_FACTORY.objectNode() + .set("201", NODE_FACTORY.objectNode().put("description", "Create " + entity.getName() + " entities.") + .set("content", NODE_FACTORY.objectNode().set("application/json", NODE_FACTORY.objectNode() + .set("schema", NODE_FACTORY.objectNode().put("type", "array") + .set("items", NODE_FACTORY.objectNode().put("$ref", + String.format("#/components/schemas/%s%s", upperFirst, ENTITY_RESPONSE_SUFFIX)))))))); + postMethod.set("tags", tagsNode); + + ObjectNode listMethods = NODE_FACTORY.objectNode(); + listMethods.set("get", scrollMethod); + listMethods.set("post", postMethod); + + return listMethods; + } + + private ObjectNode buildSingleEntityPath(Entity entity, Set parameterDefinitions) { + final String upperFirst = toUpperFirst(entity.getName()); + final String aspectParameterName = upperFirst + "Aspects"; + ArrayNode tagsNode = NODE_FACTORY.arrayNode().add(entity.getName() + " Entity"); + + ObjectNode getMethod = NODE_FACTORY.objectNode() + .put("summary", String.format("Get %s by key.", entity.getName())) + .put("operationId", String.format("get", upperFirst)); + getMethod.set("tags", tagsNode); + ArrayNode singlePathParametersNode = NODE_FACTORY.arrayNode(); + getMethod.set("parameters", singlePathParametersNode); + singlePathParametersNode.add(NODE_FACTORY.objectNode() + .put("in", "query") + .put("name", "systemMetadata") + .put("description", "Include systemMetadata with response.") + .set("schema", NODE_FACTORY.objectNode() + .put("type", "boolean") + .put("default", false))); + if(parameterDefinitions.contains(aspectParameterName)) { + singlePathParametersNode.add(NODE_FACTORY.objectNode() + .put("$ref", String.format("#/components/parameters/%s", aspectParameterName + MODEL_VERSION))); + } + + ObjectNode responses = NODE_FACTORY.objectNode(); + getMethod.set("responses", responses); + responses.set("200", NODE_FACTORY.objectNode().put("description", "Success") + .set("content", NODE_FACTORY.objectNode().set("application/json", NODE_FACTORY.objectNode() + .set("schema", NODE_FACTORY.objectNode().put("$ref", + String.format("#/components/schemas/%s%s", upperFirst, ENTITY_RESPONSE_SUFFIX)))))); + responses.set("404", NODE_FACTORY.objectNode() + .put("description", "Not Found") + .set("content", NODE_FACTORY.objectNode() + .set("application/json", NODE_FACTORY.objectNode() + .set("schema", NODE_FACTORY.objectNode())))); + + ObjectNode headResponses = NODE_FACTORY.objectNode(); + headResponses.set("204", NODE_FACTORY.objectNode() + .put("description", entity.getName() + " exists.") + .set("content", NODE_FACTORY.objectNode() + .set("application/json", NODE_FACTORY.objectNode()))); + headResponses.set("404", NODE_FACTORY.objectNode() + .put("description", entity.getName() + " does not exist.") + .set("content", NODE_FACTORY.objectNode() + .set("application/json", NODE_FACTORY.objectNode()))); + ObjectNode headMethod = NODE_FACTORY.objectNode() + .put("summary", upperFirst + " existence.") + .put("operationId", String.format("head", upperFirst)) + .set("responses", headResponses); + headMethod.set("tags", tagsNode); + + ObjectNode deleteMethod = NODE_FACTORY.objectNode() + .put("summary", "Delete entity " + upperFirst) + .put("operationId", String.format("delete", upperFirst)) + .set("responses", NODE_FACTORY.objectNode() + .set("200", NODE_FACTORY.objectNode() + .put("description", "Delete " + entity.getName() + " entity.") + .set("content", NODE_FACTORY.objectNode() + .set("application/json", NODE_FACTORY.objectNode())))); + deleteMethod.set("tags", tagsNode); + + ObjectNode singlePathMethods = NODE_FACTORY.objectNode() + .set("parameters", NODE_FACTORY.arrayNode() + .add(NODE_FACTORY.objectNode() + .put("in", "path") + .put("name", "urn") + .put("required", true) + .set("schema", NODE_FACTORY.objectNode().put("type", "string")))); + singlePathMethods.set("get", getMethod); + singlePathMethods.set("head", headMethod); + singlePathMethods.set("delete", deleteMethod); + + return singlePathMethods; + } + + private ObjectNode buildSingleEntityAspectPath(Entity entity, String aspect) { + final String upperFirstEntity = toUpperFirst(entity.getName()); + final String upperFirstAspect = toUpperFirst(aspect); + + ArrayNode tagsNode = NODE_FACTORY.arrayNode() + .add(aspect + " Aspect"); + + ObjectNode getMethod = NODE_FACTORY.objectNode() + .put("summary", String.format("Get %s for %s.", aspect, entity.getName())) + .put("operationId", String.format("get%s", upperFirstAspect, upperFirstEntity)); + getMethod.set("tags", tagsNode); + ArrayNode singlePathParametersNode = NODE_FACTORY.arrayNode(); + getMethod.set("parameters", singlePathParametersNode); + singlePathParametersNode.add(NODE_FACTORY.objectNode() + .put("in", "query") + .put("name", "systemMetadata") + .put("description", "Include systemMetadata with response.") + .set("schema", NODE_FACTORY.objectNode() + .put("type", "boolean") + .put("default", false))); + getMethod.set("responses", NODE_FACTORY.objectNode().set("200", NODE_FACTORY.objectNode() + .put("description", "Success").set("content", NODE_FACTORY.objectNode() + .set("application/json", NODE_FACTORY.objectNode().set("schema", NODE_FACTORY.objectNode() + .put("$ref", + String.format("#/components/schemas/%s%s", upperFirstAspect, ASPECT_RESPONSE_SUFFIX))))))); + + ObjectNode headResponses = NODE_FACTORY.objectNode(); + headResponses.set("200", NODE_FACTORY.objectNode() + .put("description", String.format("%s on %s exists.", aspect, entity.getName())) + .set("content", NODE_FACTORY.objectNode() + .set("application/json", NODE_FACTORY.objectNode()))); + headResponses.set("404", NODE_FACTORY.objectNode() + .put("description", String.format("%s on %s does not exist.", aspect, entity.getName())) + .set("content", NODE_FACTORY.objectNode() + .set("application/json", NODE_FACTORY.objectNode()))); + ObjectNode headMethod = NODE_FACTORY.objectNode() + .put("summary", String.format("%s on %s existence.", aspect, upperFirstEntity)) + .put("operationId", String.format("head%s", upperFirstAspect, upperFirstEntity)) + .set("responses", headResponses); + headMethod.set("tags", tagsNode); + + ObjectNode deleteMethod = NODE_FACTORY.objectNode() + .put("summary", String.format("Delete %s on entity %s", aspect, upperFirstEntity)) + .put("operationId", String.format("delete%s", upperFirstAspect, upperFirstEntity)) + .set("responses", NODE_FACTORY.objectNode() + .set("200", NODE_FACTORY.objectNode() + .put("description", String.format("Delete %s on %s entity.", aspect, upperFirstEntity)) + .set("content", NODE_FACTORY.objectNode() + .set("application/json", NODE_FACTORY.objectNode())))); + deleteMethod.set("tags", tagsNode); + + ObjectNode postMethod = NODE_FACTORY.objectNode() + .put("summary", String.format("Create aspect %s on %s ", aspect, upperFirstEntity)) + .put("operationId", String.format("create%s", upperFirstAspect, upperFirstEntity)); + postMethod.set("requestBody", NODE_FACTORY.objectNode() + .put("description", String.format("Create aspect %s on %s entity.", aspect, upperFirstEntity)) + .put("required", true).set("content", NODE_FACTORY.objectNode() + .set("application/json", NODE_FACTORY.objectNode().set("schema", NODE_FACTORY.objectNode() + .put("$ref", + String.format("#/components/schemas/%s%s", upperFirstAspect, ASPECT_REQUEST_SUFFIX)))))); + postMethod.set("responses", NODE_FACTORY.objectNode().set("201", NODE_FACTORY.objectNode() + .put("description", String.format("Create aspect %s on %s entity.", aspect, upperFirstEntity)) + .set("content", NODE_FACTORY.objectNode().set("application/json", NODE_FACTORY.objectNode() + .set("schema", NODE_FACTORY.objectNode().put("$ref", + String.format("#/components/schemas/%s%s", upperFirstAspect, ASPECT_RESPONSE_SUFFIX))))))); + postMethod.set("tags", tagsNode); + + ObjectNode singlePathMethods = NODE_FACTORY.objectNode() + .set("parameters", NODE_FACTORY.arrayNode() + .add(NODE_FACTORY.objectNode() + .put("in", "path") + .put("name", "urn") + .put("required", true) + .set("schema", NODE_FACTORY.objectNode().put("type", "string")))); + singlePathMethods.set("get", getMethod); + singlePathMethods.set("head", headMethod); + singlePathMethods.set("delete", deleteMethod); + singlePathMethods.set("post", postMethod); + + return singlePathMethods; + } +} diff --git a/datahub-frontend/conf/routes b/datahub-frontend/conf/routes index 38e6f769027f0..3102c26497fed 100644 --- a/datahub-frontend/conf/routes +++ b/datahub-frontend/conf/routes @@ -33,6 +33,8 @@ GET /openapi/*path c POST /openapi/*path controllers.Application.proxy(path: String, request: Request) DELETE /openapi/*path controllers.Application.proxy(path: String, request: Request) PUT /openapi/*path controllers.Application.proxy(path: String, request: Request) +HEAD /openapi/*path controllers.Application.proxy(path: String, request: Request) +PATCH /openapi/*path controllers.Application.proxy(path: String, request: Request) # Map static resources from the /public folder to the /assets URL path GET /assets/*file controllers.Assets.at(path="/public", file) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java index 40e0432f9ed39..4867aa1d89825 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java @@ -145,6 +145,9 @@ private void mapDatasetProperties(@Nonnull Dataset dataset, @Nonnull DataMap dat properties.setQualifiedName(gmsProperties.getQualifiedName()); dataset.setProperties(properties); dataset.setDescription(properties.getDescription()); + if (gmsProperties.getUri() != null) { + dataset.setUri(gmsProperties.getUri().toString()); + } TimeStamp created = gmsProperties.getCreated(); if (created != null) { properties.setCreated(created.getTime()); diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entity.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entity.java index c446f63b65321..f32aa1aa8bd47 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entity.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entity.java @@ -1,12 +1,14 @@ package com.linkedin.metadata.models.registry.config; import java.util.List; + import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.NoArgsConstructor; import lombok.Value; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import javax.annotation.Nullable; @Value @@ -18,4 +20,7 @@ public class Entity { String doc; String keyAspect; List aspects; + + @Nullable + String category; } diff --git a/li-utils/build.gradle b/li-utils/build.gradle index e8b672a3a21fa..8f526cffba094 100644 --- a/li-utils/build.gradle +++ b/li-utils/build.gradle @@ -36,5 +36,5 @@ idea { } } -// Need to compile backing java definitions with the data template. +// Need to compile backing java parameterDefinitions with the data template. sourceSets.mainGeneratedDataTemplate.java.srcDirs('src/main/javaPegasus/') \ No newline at end of file diff --git a/metadata-auth/auth-api/build.gradle b/metadata-auth/auth-api/build.gradle index 2bf9e5243e152..7159aa5f15e61 100644 --- a/metadata-auth/auth-api/build.gradle +++ b/metadata-auth/auth-api/build.gradle @@ -31,7 +31,7 @@ dependencies() { api project(path: ':metadata-utils') implementation externalDependency.guava - implementation externalDependency.lombok + compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle index e304bb5329c62..fc72fc4257491 100644 --- a/metadata-integration/java/datahub-client/build.gradle +++ b/metadata-integration/java/datahub-client/build.gradle @@ -56,7 +56,7 @@ dependencies { testImplementation externalDependency.httpAsyncClient testRuntimeOnly externalDependency.logbackClassic - swaggerCodegen 'io.swagger.codegen.v3:swagger-codegen-cli:3.0.33' + swaggerCodegen externalDependency.swaggerCli } task copyAvroSchemas { diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index d2b584ceb6745..a2c643516dce6 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -43,7 +43,6 @@ dependencies { implementation externalDependency.resilience4j api externalDependency.springContext implementation externalDependency.swaggerAnnotations - swaggerCodegen 'io.swagger.codegen.v3:swagger-codegen-cli:3.0.33' implementation(externalDependency.mixpanel) { exclude group: 'org.json', module: 'json' } @@ -121,33 +120,6 @@ project.compileJava { } } -tasks.register('generateOpenApiPojos', GenerateSwaggerCode) { - it.setInputFile( - file( - "${project(':metadata-models').projectDir}/src/generatedJsonSchema/combined/open-api.yaml" - ) - ) - it.setOutputDir(file("$projectDir/generated")) - it.setLanguage("spring") - it.setComponents(['models']) - it.setTemplateDir(file("$projectDir/src/main/resources/JavaSpring")) - it.setAdditionalProperties([ - "group-id" : "io.datahubproject", - "dateLibrary" : "java8", - "java11" : "true", - "modelPropertyNaming" : "original", - "modelPackage" : "io.datahubproject.openapi.generated"] as Map) - - dependsOn ':metadata-models:generateJsonSchema' -} - -compileJava.dependsOn generateOpenApiPojos -processResources.dependsOn generateOpenApiPojos -sourceSets.main.java.srcDir "${generateOpenApiPojos.outputDir}/src/main/java" -sourceSets.main.resources.srcDir "${generateOpenApiPojos.outputDir}/src/main/resources" - -checkstyleMain.exclude '**/generated/**' - clean { project.delete("$projectDir/generated") } \ No newline at end of file diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 2cacdc7d38fc6..66188473b9d03 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -1149,12 +1149,13 @@ public void ingestEntities(@Nonnull final List entities, @Nonnull final } @Override - public void ingestEntity(Entity entity, AuditStamp auditStamp) { + public SystemMetadata ingestEntity(Entity entity, AuditStamp auditStamp) { SystemMetadata generatedSystemMetadata = new SystemMetadata(); generatedSystemMetadata.setRunId(DEFAULT_RUN_ID); generatedSystemMetadata.setLastObserved(System.currentTimeMillis()); ingestEntity(entity, auditStamp, generatedSystemMetadata); + return generatedSystemMetadata; } @Override @@ -1543,6 +1544,13 @@ public Boolean isSoftDeleted(@Nonnull final Urn urn) { return statusAspect != null && ((Status) statusAspect).isRemoved(); } + @Override + public Boolean exists(Urn urn, String aspectName) { + EntityAspectIdentifier dbKey = new EntityAspectIdentifier(urn.toString(), aspectName, ASPECT_LATEST_VERSION); + Map aspects = _aspectDao.batchGet(Set.of(dbKey)); + return aspects.values().stream().anyMatch(Objects::nonNull); + } + @Nullable @Override public RollbackResult deleteAspect(String urn, String aspectName, @Nonnull Map conditions, boolean hardDelete) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java index a045bb357d5f0..94b8d57efcc16 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchService.java @@ -174,7 +174,7 @@ private List getEntitiesToSearch(@Nonnull List inputEntities) { */ @Nonnull public ScrollResult scrollAcrossEntities(@Nonnull List entities, @Nonnull String input, - @Nullable Filter postFilters, @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, + @Nullable Filter postFilters, @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size, @Nullable SearchFlags searchFlags) { log.debug(String.format( "Searching Search documents entities: %s, input: %s, postFilters: %s, sortCriterion: %s, from: %s, size: %s", diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java index f698e28c0be6d..13a7d16b723a7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java @@ -127,7 +127,7 @@ public ScrollResult scroll( @Nullable Filter filters, @Nullable SortCriterion sortCriterion, @Nullable String scrollId, - @Nonnull String keepAlive, + @Nullable String keepAlive, int size, @Nullable SearchFlags flags) { return getCachedScrollResults(entities, query, filters, sortCriterion, scrollId, keepAlive, size, flags); @@ -238,7 +238,7 @@ public ScrollResult getCachedScrollResults( @Nullable Filter filters, @Nullable SortCriterion sortCriterion, @Nullable String scrollId, - @Nonnull String keepAlive, + @Nullable String keepAlive, int size, @Nullable SearchFlags flags) { try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "getCachedScrollResults").time()) { @@ -326,7 +326,7 @@ private ScrollResult getRawScrollResults( final Filter filters, final SortCriterion sortCriterion, @Nullable final String scrollId, - @Nonnull final String keepAlive, + @Nullable final String keepAlive, final int count, final boolean fulltext) { if (fulltext) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java index ce7b44c715d6b..32adce458770d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java @@ -27,6 +27,7 @@ import com.linkedin.metadata.shared.ElasticSearchIndexed; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.elasticsearch.action.search.SearchResponse; @Slf4j @@ -174,7 +175,7 @@ public List getBrowsePaths(@Nonnull String entityName, @Nonnull Urn urn) @Nonnull @Override public ScrollResult fullTextScroll(@Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size) { + @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size) { log.debug(String.format( "Scrolling Structured Search documents entities: %s, input: %s, postFilters: %s, sortCriterion: %s, scrollId: %s, size: %s", entities, input, postFilters, sortCriterion, scrollId, size)); @@ -185,7 +186,7 @@ public ScrollResult fullTextScroll(@Nonnull List entities, @Nonnull Stri @Nonnull @Override public ScrollResult structuredScroll(@Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size) { + @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size) { log.debug(String.format( "Scrolling FullText Search documents entities: %s, input: %s, postFilters: %s, sortCriterion: %s, scrollId: %s, size: %s", entities, input, postFilters, sortCriterion, scrollId, size)); @@ -193,6 +194,10 @@ public ScrollResult structuredScroll(@Nonnull List entities, @Nonnull St new SearchFlags().setFulltext(false)); } + public Optional raw(@Nonnull String indexName, @Nullable String jsonQuery) { + return esSearchDAO.raw(indexName, jsonQuery); + } + @Override public int maxResultSize() { return ESUtils.MAX_RESULT_SIZE; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index e204cb6fd6fbe..f3864d99ba5e9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -26,8 +26,10 @@ import io.opentelemetry.extension.annotations.WithSpan; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -40,6 +42,13 @@ import org.elasticsearch.client.Response; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.client.core.CountRequest; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.search.SearchModule; +import org.elasticsearch.search.builder.SearchSourceBuilder; import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.models.registry.template.util.TemplateUtil.*; @@ -52,6 +61,11 @@ @Slf4j @RequiredArgsConstructor public class ESSearchDAO { + private static final NamedXContentRegistry X_CONTENT_REGISTRY; + static { + SearchModule searchModule = new SearchModule(Settings.EMPTY, false, Collections.emptyList()); + X_CONTENT_REGISTRY = new NamedXContentRegistry(searchModule.getNamedXContents()); + } private final EntityRegistry entityRegistry; private final RestHighLevelClient client; @@ -285,7 +299,7 @@ public Map aggregateByValue(@Nullable String entityName, @Nonnull */ @Nonnull public ScrollResult scroll(@Nonnull List entities, @Nonnull String input, @Nullable Filter postFilters, - @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nonnull String keepAlive, int size, SearchFlags searchFlags) { + @Nullable SortCriterion sortCriterion, @Nullable String scrollId, @Nullable String keepAlive, int size, SearchFlags searchFlags) { final String finalInput = input.isEmpty() ? "*" : input; String[] indexArray = entities.stream() .map(indexConvention::getEntityIndexName) @@ -302,11 +316,11 @@ public ScrollResult scroll(@Nonnull List entities, @Nonnull String input if (supportsPointInTime()) { if (System.currentTimeMillis() + 10000 <= searchAfterWrapper.getExpirationTime()) { pitId = searchAfterWrapper.getPitId(); - } else { + } else if (keepAlive != null) { pitId = createPointInTime(indexArray, keepAlive); } } - } else if (supportsPointInTime()) { + } else if (supportsPointInTime() && keepAlive != null) { pitId = createPointInTime(indexArray, keepAlive); } @@ -326,6 +340,23 @@ public ScrollResult scroll(@Nonnull List entities, @Nonnull String input return executeAndExtract(entitySpecs, searchRequest, transformedFilters, scrollId, keepAlive, size); } + public Optional raw(@Nonnull String indexName, @Nullable String jsonQuery) { + return Optional.ofNullable(jsonQuery).map(json -> { + try { + XContentParser parser = XContentType.JSON.xContent().createParser(X_CONTENT_REGISTRY, + LoggingDeprecationHandler.INSTANCE, json); + SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(parser); + + SearchRequest searchRequest = new SearchRequest(indexConvention.getIndexName(indexName)); + searchRequest.source(searchSourceBuilder); + + return client.search(searchRequest, RequestOptions.DEFAULT); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + private boolean supportsPointInTime() { return pointInTimeCreationEnabled && ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH.equalsIgnoreCase(elasticSearchImplementation); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 5973f77da28aa..dbd933d59d7f3 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -228,7 +228,7 @@ public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter fi @Nonnull @WithSpan public SearchRequest getSearchRequest(@Nonnull String input, @Nullable Filter filter, - @Nullable SortCriterion sortCriterion, @Nullable Object[] sort, @Nullable String pitId, @Nonnull String keepAlive, + @Nullable SortCriterion sortCriterion, @Nullable Object[] sort, @Nullable String pitId, @Nullable String keepAlive, int size, SearchFlags searchFlags) { SearchRequest searchRequest = new PITAwareSearchRequest(); SearchFlags finalSearchFlags = applyDefaultSearchFlags(searchFlags, input, DEFAULT_SERVICE_SEARCH_FLAGS); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index 741eb5568d2ea..12c081a5c25a6 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -261,11 +261,11 @@ public static String extractTargetIndex(String id) { } public static void setSearchAfter(SearchSourceBuilder searchSourceBuilder, @Nullable Object[] sort, - @Nullable String pitId, String keepAlive) { + @Nullable String pitId, @Nullable String keepAlive) { if (sort != null && sort.length > 0) { searchSourceBuilder.searchAfter(sort); } - if (StringUtils.isNotBlank(pitId)) { + if (StringUtils.isNotBlank(pitId) && keepAlive != null) { PointInTimeBuilder pointInTimeBuilder = new PointInTimeBuilder(pitId); pointInTimeBuilder.setKeepAlive(TimeValue.parseTimeValue(keepAlive, "keepAlive")); searchSourceBuilder.pointInTimeBuilder(pointInTimeBuilder); diff --git a/metadata-models/build.gradle b/metadata-models/build.gradle index 2e8efae9b7bce..db01be3ccebdf 100644 --- a/metadata-models/build.gradle +++ b/metadata-models/build.gradle @@ -2,6 +2,7 @@ import io.datahubproject.GenerateJsonSchemaTask apply plugin: 'java-library' apply plugin: 'pegasus' +apply plugin: 'org.hidetake.swagger.generator' tasks.withType(JavaCompile).configureEach { javaCompiler = javaToolchains.compilerFor { @@ -24,9 +25,25 @@ dependencies { api project(':li-utils') dataModel project(':li-utils') + compileOnly externalDependency.lombok + annotationProcessor externalDependency.lombok + compileOnly externalDependency.swaggerAnnotations + compileOnly externalDependency.springBootStarterValidation + compileOnly externalDependency.jacksonCore + compileOnly externalDependency.jacksonDataBind + + swaggerCodegen externalDependency.swaggerCli testImplementation externalDependency.guava } +sourceSets { + main { + java { + srcDirs = ["$buildDir/openapi/generated/src/main/java"] + } + } +} + mainAvroSchemaJar.dependsOn generateAvroSchema pegasus.main.generationModes = [PegasusGenerationMode.PEGASUS, PegasusGenerationMode.AVRO] @@ -35,9 +52,32 @@ pegasus.main.generationModes = [PegasusGenerationMode.PEGASUS, PegasusGeneration tasks.register('generateJsonSchema', GenerateJsonSchemaTask) { it.setInputDirectory("$projectDir/src/mainGeneratedAvroSchema") it.setOutputDirectory("$projectDir/src/generatedJsonSchema") + it.setEntityRegistryYaml("${project(':metadata-models').projectDir}/src/main/resources/entity-registry.yml") dependsOn generateAvroSchema } -clean { - project.delete("$projectDir/src/generatedJsonSchema") -} \ No newline at end of file +// https://github.com/int128/gradle-swagger-generator-plugin#task-type-generateswaggercode +task openApiGenerate(type: GenerateSwaggerCode, dependsOn: 'generateJsonSchema') { + inputFile = file("$projectDir/src/generatedJsonSchema/combined/open-api.yaml") + outputDir = file("$buildDir/openapi/generated") + language = "spring" + components = ["models"] + templateDir = file("$projectDir/src/main/resources/JavaSpring") + additionalProperties = [ + 'group-id' : "io.datahubproject", + 'dateLibrary' : "java8", + 'java11' : "true", + 'modelPropertyNaming': "original", + 'modelPackage' : "io.datahubproject.openapi.generated" + ] +} +tasks.getByName("compileJava").dependsOn(openApiGenerate) + +checkstyleMain.exclude '**/generated/**' + +task cleanExtraDirs { + delete "$projectDir/src/generatedJsonSchema" +} +clean.finalizedBy(cleanExtraDirs) + +checkstyleMain.exclude '**/generated/**' diff --git a/metadata-models/src/main/resources/JavaSpring/interface.mustache b/metadata-models/src/main/resources/JavaSpring/interface.mustache new file mode 100644 index 0000000000000..b09d4f3a66cc0 --- /dev/null +++ b/metadata-models/src/main/resources/JavaSpring/interface.mustache @@ -0,0 +1,25 @@ +{{#jackson}} +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +{{/jackson}} +/** +* {{#description}}{{.}}{{/description}}{{^description}}{{classname}}{{/description}} +*/ +{{#jackson}} +@JsonTypeInfo( + use = JsonTypeInfo.Id.NAME, + include = JsonTypeInfo.As.PROPERTY, + property = "__type") +@JsonSubTypes({ + {{#subTypes}} + @JsonSubTypes.Type(value = {{classname}}.class, name = "{{classname}}"){{^@last}},{{/@last}} + {{/subTypes}} +}) +{{/jackson}} +public interface {{{classname}}} { +{{#vendorExtensions}} +{{#x-discriminator-type-getter}} + {{x-discriminator-type}} {{x-discriminator-type-getter}}(); +{{/x-discriminator-type-getter}} +{{/vendorExtensions}} +} \ No newline at end of file diff --git a/metadata-models/src/main/resources/JavaSpring/model.mustache b/metadata-models/src/main/resources/JavaSpring/model.mustache new file mode 100644 index 0000000000000..a048f249a6b3d --- /dev/null +++ b/metadata-models/src/main/resources/JavaSpring/model.mustache @@ -0,0 +1,41 @@ +package {{package}}; + +{{^x-is-composed-model}} +import java.util.Objects; +{{#imports}}import {{import}}; +{{/imports}} +{{#serializableModel}} +import java.io.Serializable; +{{/serializableModel}} +{{#useBeanValidation}} +import org.springframework.validation.annotation.Validated; +import javax.validation.Valid; +import com.fasterxml.jackson.annotation.JsonInclude; +import javax.validation.constraints.*; +{{/useBeanValidation}} +{{#jackson}} +{{#withXml}} +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlRootElement; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty; +{{/withXml}} +{{/jackson}} +{{#withXml}} +import javax.xml.bind.annotation.*; +{{/withXml}} +{{/x-is-composed-model}} + +{{#models}} +{{#model}} +{{#isComposedModel}} +{{>interface}} +{{/isComposedModel}} +{{^isComposedModel}} +{{#isEnum}} +{{>enumOuterClass}} +{{/isEnum}} +{{^isEnum}} +{{>pojo}} +{{/isEnum}} +{{/isComposedModel}} +{{/model}} +{{/models}} \ No newline at end of file diff --git a/metadata-models/src/main/resources/JavaSpring/pojo.mustache b/metadata-models/src/main/resources/JavaSpring/pojo.mustache new file mode 100644 index 0000000000000..e0b6f3e7a1a6f --- /dev/null +++ b/metadata-models/src/main/resources/JavaSpring/pojo.mustache @@ -0,0 +1,177 @@ +{{#if hasVars}} +{{else}} +import com.fasterxml.jackson.annotation.JsonProperty; +import io.swagger.v3.oas.annotations.media.Schema; +{{/if}} +import lombok.Builder; +import lombok.Getter; +import lombok.extern.jackson.Jacksonized; +{{#if interfaceModels}} +import com.fasterxml.jackson.annotation.JsonTypeInfo; +{{/if}} +/** + * {{#description}}{{.}}{{/description}}{{^description}}{{classname}}{{/description}} + */{{#description}} +{{#useOas2}}@ApiModel{{/useOas2}}{{^useOas2}}@Schema{{/useOas2}}(description = "{{{description}}}"){{/description}} +{{#useBeanValidation}}@Validated{{/useBeanValidation}} +{{>generatedAnnotation}}{{#discriminator}}{{>typeInfoAnnotation}}{{/discriminator}}{{>xmlAnnotation}} +@Jacksonized @Builder(toBuilder = true) +{{#if interfaceModels}} +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.EXISTING_PROPERTY, property = "__type") +{{/if}} +@JsonInclude(JsonInclude.Include.NON_NULL) +public class {{classname}} {{#parent}}extends {{{parent}}}{{/parent}} {{#serializableModel}}implements Serializable {{#interfaceModels}}, {{classname}}{{^@last}}, {{/@last}}{{#@last}} {{/@last}}{{/interfaceModels}}{{/serializableModel}}{{^serializableModel}}{{#interfaceModels}}{{#@first}}implements {{/@first}}{{classname}}{{^@last}}, {{/@last}}{{#@last}}{{/@last}}{{/interfaceModels}}{{/serializableModel}} { +{{#serializableModel}} + private static final long serialVersionUID = 1L; + +{{/serializableModel}} +{{#if interfaceModels}} + + @Getter(value = lombok.AccessLevel.NONE) + @JsonProperty(value = "__type", defaultValue = "{{classname}}") @Builder.Default + private String __type = "{{classname}}"; + + /** + * Name of this subclass in SimpleClassName format + * @return __type + **/ + @Schema(required = true, description = "Name of this subclass in SimpleClassName format", allowableValues = {"{{classname}}"}, + defaultValue = "{{classname}}") + @NotNull + public String get__type() { + return __type; + } +{{/if}} + + {{#vars}} + {{#baseItems this}} + {{#isEnum}} +{{>enumClass}} + {{/isEnum}} + {{/baseItems}} + {{#jackson}} + {{#vendorExtensions.x-is-discriminator-property}} + @JsonTypeId + {{/vendorExtensions.x-is-discriminator-property}} + {{^vendorExtensions.x-is-discriminator-property}} + @JsonProperty("{{baseName}}") @Builder.Default{{#withXml}} + @JacksonXmlProperty({{#isXmlAttribute}}isAttribute = true, {{/isXmlAttribute}}{{#xmlNamespace}}namespace="{{xmlNamespace}}", {{/xmlNamespace}}localName = "{{#xmlName}}{{xmlName}}{{/xmlName}}{{^xmlName}}{{baseName}}{{/xmlName}}"){{/withXml}} + {{/vendorExtensions.x-is-discriminator-property}} + {{/jackson}} + {{#gson}} + @SerializedName("{{baseName}}") + {{/gson}} + {{#isContainer}} + {{#useBeanValidation}}@Valid{{/useBeanValidation}} + private {{{datatypeWithEnum}}} {{name}}{{#required}} = {{{defaultValue}}}{{/required}}{{^required}} = null{{/required}}; + {{/isContainer}} + {{^isContainer}} + private {{{datatypeWithEnum}}} {{name}} = {{{defaultValue}}}; + {{/isContainer}} + + {{/vars}} + {{#vars}} + public {{classname}} {{name}}({{{datatypeWithEnum}}} {{name}}) { + this.{{name}} = {{name}}; + return this; + } + {{#isListContainer}} + + public {{classname}} add{{nameInCamelCase}}Item({{{items.datatypeWithEnum}}} {{name}}Item) { + {{^required}} + if (this.{{name}} == null) { + this.{{name}} = {{{defaultValue}}}; + } + {{/required}} + this.{{name}}.add({{name}}Item); + return this; + } + {{/isListContainer}} + {{#isMapContainer}} + + public {{classname}} put{{nameInCamelCase}}Item(String key, {{{items.datatypeWithEnum}}} {{name}}Item) { + {{^required}} + if (this.{{name}} == null) { + this.{{name}} = {{{defaultValue}}}; + } + {{/required}} + this.{{name}}.put(key, {{name}}Item); + return this; + } + {{/isMapContainer}} + + /** + {{#description}} + * {{{description}}} + {{/description}} + {{^description}} + * Get {{name}} + {{/description}} + {{#minimum}} + * minimum: {{minimum}} + {{/minimum}} + {{#maximum}} + * maximum: {{maximum}} + {{/maximum}} + * @return {{name}} + **/ + {{#vendorExtensions.extraAnnotation}} + {{{vendorExtensions.extraAnnotation}}} + {{/vendorExtensions.extraAnnotation}} + {{#useOas2}} + @ApiModelProperty({{#example}}example = "{{{example}}}", {{/example}}{{#required}}required = {{required}}, {{/required}}{{#isReadOnly}}readOnly = {{{isReadOnly}}}, {{/isReadOnly}}value = "{{{description}}}") + {{/useOas2}} + {{^useOas2}} + @Schema({{#example}}example = "{{{example}}}", {{/example}}{{#required}}required = {{required}}, {{/required}}{{#isReadOnly}}accessMode = Schema.AccessMode.READ_ONLY, {{/isReadOnly}}description = "{{{description}}}") + {{/useOas2}} + {{#useBeanValidation}}{{>beanValidation}}{{/useBeanValidation}} public {{{datatypeWithEnum}}} {{getter}}() { + return {{name}}; + } + + public void {{setter}}({{{datatypeWithEnum}}} {{name}}) { + this.{{name}} = {{name}}; + } + + {{/vars}} + + @Override + public boolean equals(java.lang.Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + }{{#hasVars}} + {{classname}} {{classVarName}} = ({{classname}}) o; + return {{#vars}}Objects.equals(this.{{name}}, {{classVarName}}.{{name}}){{#hasMore}} && + {{/hasMore}}{{/vars}}{{#parent}} && + super.equals(o){{/parent}};{{/hasVars}}{{^hasVars}} + return true;{{/hasVars}} + } + + @Override + public int hashCode() { + return Objects.hash({{#vars}}{{name}}{{#hasMore}}, {{/hasMore}}{{/vars}}{{#parent}}{{#hasVars}}, {{/hasVars}}super.hashCode(){{/parent}}); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("class {{classname}} {\n"); + {{#parent}}sb.append(" ").append(toIndentedString(super.toString())).append("\n");{{/parent}} + {{#vars}}sb.append(" {{name}}: ").append(toIndentedString({{name}})).append("\n"); + {{/vars}}sb.append("}"); + return sb.toString(); + } + + /** + * Convert the given object to string with each line indented by 4 spaces + * (except the first line). + */ + private String toIndentedString(java.lang.Object o) { + if (o == null) { + return "null"; + } + return o.toString().replace("\n", "\n "); + } +} \ No newline at end of file diff --git a/metadata-models/src/main/resources/JavaSpring/readme.txt b/metadata-models/src/main/resources/JavaSpring/readme.txt new file mode 100644 index 0000000000000..f127cd3e68553 --- /dev/null +++ b/metadata-models/src/main/resources/JavaSpring/readme.txt @@ -0,0 +1,2 @@ +Original: +https://github.com/swagger-api/swagger-codegen-generators/tree/master/src/main/resources/handlebars/JavaSpring \ No newline at end of file diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 5f54525e1e862..56fc5f6568eb7 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -100,6 +100,7 @@ entities: - dataProcessInstanceRelationships - dataProcessInstanceRunEvent - name: chart + category: core keyAspect: chartKey aspects: - chartInfo @@ -183,6 +184,7 @@ entities: - origin - name: domain doc: A data domain within an organization. + category: core keyAspect: domainKey aspects: - domainProperties @@ -190,6 +192,7 @@ entities: - ownership - name: container doc: A container of related data assets. + category: core keyAspect: containerKey aspects: - containerProperties @@ -206,6 +209,7 @@ entities: - domains - browsePathsV2 - name: tag + category: core keyAspect: tagKey aspects: - tagProperties @@ -213,6 +217,7 @@ entities: - deprecation - status - name: glossaryTerm + category: core keyAspect: glossaryTermKey aspects: - glossaryTermInfo @@ -225,6 +230,7 @@ entities: - status - browsePaths - name: glossaryNode + category: core keyAspect: glossaryNodeKey aspects: - glossaryNodeInfo diff --git a/metadata-service/health-servlet/src/main/java/com/datahub/health/config/SpringWebConfig.java b/metadata-service/health-servlet/src/main/java/com/datahub/health/config/SpringWebConfig.java index 4354ef04b9d8f..76d9a6744c4cf 100644 --- a/metadata-service/health-servlet/src/main/java/com/datahub/health/config/SpringWebConfig.java +++ b/metadata-service/health-servlet/src/main/java/com/datahub/health/config/SpringWebConfig.java @@ -1,6 +1,7 @@ package com.datahub.health.config; import io.swagger.v3.oas.annotations.OpenAPIDefinition; +import io.swagger.v3.oas.annotations.info.Info; import io.swagger.v3.oas.annotations.servers.Server; import java.util.List; import org.springframework.context.annotation.Configuration; @@ -15,7 +16,8 @@ @EnableWebMvc -@OpenAPIDefinition(servers = {@Server(url = "/health/", description = "Default Server URL")}) +@OpenAPIDefinition(info = @Info(title = "DataHub OpenAPI", version = "1.0.0"), + servers = {@Server(url = "/health/", description = "Default Server URL")}) @Configuration public class SpringWebConfig implements WebMvcConfigurer { diff --git a/metadata-service/openapi-analytics-servlet/build.gradle b/metadata-service/openapi-analytics-servlet/build.gradle new file mode 100644 index 0000000000000..6475d215db5f5 --- /dev/null +++ b/metadata-service/openapi-analytics-servlet/build.gradle @@ -0,0 +1,67 @@ +plugins { + id 'java' + id 'org.hidetake.swagger.generator' +} + +dependencies { + + implementation project(':metadata-auth:auth-api') + implementation project(':metadata-service:auth-impl') + implementation project(':metadata-service:factories') + implementation project(':metadata-service:openapi-servlet') + implementation project(':metadata-models') + + implementation externalDependency.springBoot + implementation externalDependency.springCore + implementation externalDependency.springDocUI + implementation externalDependency.springWeb + implementation externalDependency.springWebMVC + implementation externalDependency.springBeans + implementation externalDependency.springContext + + implementation externalDependency.reflections + implementation externalDependency.slf4jApi + compileOnly externalDependency.lombok + + implementation externalDependency.antlr4Runtime + implementation externalDependency.antlr4 + + annotationProcessor externalDependency.lombok + + testImplementation externalDependency.testng + testImplementation externalDependency.mockito + testImplementation externalDependency.springBootTest + + swaggerCodegen externalDependency.swaggerCli +} + +sourceSets { + main { + java { + srcDirs = ["$buildDir/openapi/generated/src/main/java", 'src/main/java'] + } + } +} + +// https://github.com/int128/gradle-swagger-generator-plugin#task-type-generateswaggercode +task openApiGenerate(type: GenerateSwaggerCode) { + inputFile = file("$projectDir/src/main/resources/open-api.yaml") + outputDir = file("$buildDir/openapi/generated") + + language = 'spring' + + components = ["apis"] + templateDir = file("$projectDir/src/main/resources/JavaSpring") + additionalProperties = [ + 'group-id' : "io.datahubproject", + 'dateLibrary' : "java8", + 'java11' : "true", + 'modelPropertyNaming': "original", + 'modelPackage' : "io.datahubproject.openapi.generated", + 'apiPackage' : "io.datahubproject.openapi.generated.controller", + 'delegatePattern' : "true" + ] +} +tasks.getByName("compileJava").dependsOn(openApiGenerate) + +checkstyleMain.exclude '**/generated/**' \ No newline at end of file diff --git a/metadata-service/openapi-analytics-servlet/src/main/java/io/datahubproject/openapi/config/OpenapiAnalyticsConfig.java b/metadata-service/openapi-analytics-servlet/src/main/java/io/datahubproject/openapi/config/OpenapiAnalyticsConfig.java new file mode 100644 index 0000000000000..7816e81fe4a6d --- /dev/null +++ b/metadata-service/openapi-analytics-servlet/src/main/java/io/datahubproject/openapi/config/OpenapiAnalyticsConfig.java @@ -0,0 +1,14 @@ +package io.datahubproject.openapi.config; + +import io.datahubproject.openapi.delegates.DatahubUsageEventsImpl; +import io.datahubproject.openapi.generated.controller.DatahubUsageEventsApiDelegate; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class OpenapiAnalyticsConfig { + @Bean + public DatahubUsageEventsApiDelegate datahubUsageEventsApiDelegate() { + return new DatahubUsageEventsImpl(); + } +} diff --git a/metadata-service/openapi-analytics-servlet/src/main/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImpl.java b/metadata-service/openapi-analytics-servlet/src/main/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImpl.java new file mode 100644 index 0000000000000..99e47f32555df --- /dev/null +++ b/metadata-service/openapi-analytics-servlet/src/main/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImpl.java @@ -0,0 +1,48 @@ +package io.datahubproject.openapi.delegates; + +import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; +import io.datahubproject.openapi.generated.controller.DatahubUsageEventsApiDelegate; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.ResponseEntity; +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationContext; +import com.datahub.authorization.ConjunctivePrivilegeGroup; +import com.datahub.authorization.DisjunctivePrivilegeGroup; +import com.datahub.authorization.AuthorizerChain; +import org.springframework.beans.factory.annotation.Value; +import com.google.common.collect.ImmutableList; +import io.datahubproject.openapi.exception.UnauthorizedException; +import com.datahub.authorization.AuthUtil; +import com.linkedin.metadata.authorization.PoliciesConfig; + +import java.util.Optional; +import java.util.Objects; + +public class DatahubUsageEventsImpl implements DatahubUsageEventsApiDelegate { + + @Autowired + private ElasticSearchService _searchService; + @Autowired + private AuthorizerChain _authorizationChain; + @Value("${authorization.restApiAuthorization:false}") + private boolean _restApiAuthorizationEnabled; + + final public static String DATAHUB_USAGE_INDEX = "datahub_usage_event"; + + @Override + public ResponseEntity raw(String body) { + Authentication authentication = AuthenticationContext.getAuthentication(); + checkAnalyticsAuthorized(authentication); + return ResponseEntity.of(_searchService.raw(DATAHUB_USAGE_INDEX, body).map(Objects::toString)); + } + + private void checkAnalyticsAuthorized(Authentication authentication) { + String actorUrnStr = authentication.getActor().toUrnStr(); + DisjunctivePrivilegeGroup orGroup = new DisjunctivePrivilegeGroup(ImmutableList.of(new ConjunctivePrivilegeGroup( + ImmutableList.of(PoliciesConfig.GET_ANALYTICS_PRIVILEGE.getType())))); + + if (_restApiAuthorizationEnabled && !AuthUtil.isAuthorized(_authorizationChain, actorUrnStr, Optional.empty(), orGroup)) { + throw new UnauthorizedException(actorUrnStr + " is unauthorized to get analytics."); + } + } +} diff --git a/metadata-service/openapi-analytics-servlet/src/main/resources/JavaSpring/apiController.mustache b/metadata-service/openapi-analytics-servlet/src/main/resources/JavaSpring/apiController.mustache new file mode 100644 index 0000000000000..6b22940f0e7ed --- /dev/null +++ b/metadata-service/openapi-analytics-servlet/src/main/resources/JavaSpring/apiController.mustache @@ -0,0 +1,160 @@ +package {{package}}; + +{{^isJava8or11}} +{{#imports}}import {{import}}; +{{/imports}} +{{/isJava8or11}} +{{^isDelegate}} +import com.fasterxml.jackson.databind.ObjectMapper; +{{/isDelegate}} +{{^isJava8or11}} +import io.swagger.annotations.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +{{/isJava8or11}} +import org.springframework.stereotype.Controller; +import org.springframework.web.bind.annotation.RequestMapping; +{{^isJava8or11}} +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestHeader; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.multipart.MultipartFile; + +{{#useBeanValidation}} +{{#jakarta}} +import jakarta.validation.Valid; +import jakarta.validation.constraints.*; +{{/jakarta}} +{{^jakarta}} +import javax.validation.Valid; +import javax.validation.constraints.*; +{{/jakarta}} +{{/useBeanValidation}} +{{/isJava8or11}} +{{^isDelegate}} +{{#jakarta}} +import jakarta.servlet.http.HttpServletRequest; +{{/jakarta}} +{{^jakarta}} +import javax.servlet.http.HttpServletRequest; +{{/jakarta}} + {{#isJava8or11}} +import java.util.Optional; + {{/isJava8or11}} +{{/isDelegate}} +{{^jdk8-no-delegate}} + {{#useOptional}} +import java.util.Optional; + {{/useOptional}} +{{/jdk8-no-delegate}} +{{^isJava8or11}} + {{^isDelegate}} +import java.io.IOException; + {{/isDelegate}} +import java.util.List; + {{#async}} +import java.util.concurrent.Callable; + {{/async}} +{{/isJava8or11}} +{{>generatedAnnotation}} +@Controller +@RequestMapping("/v1/analytics") +{{#operations}} +public class {{classname}}Controller implements {{classname}} { + +{{#isDelegate}} + private final {{classname}}Delegate delegate; + + @org.springframework.beans.factory.annotation.Autowired + public {{classname}}Controller({{classname}}Delegate delegate) { + this.delegate = delegate; + } + {{#isJava8or11}} + + @Override + public {{classname}}Delegate getDelegate() { + return delegate; + } + {{/isJava8or11}} +{{/isDelegate}} +{{^isDelegate}} + {{^isJava8or11}} + private static final Logger log = LoggerFactory.getLogger({{classname}}Controller.class); + + {{/isJava8or11}} + private final ObjectMapper objectMapper; + + private final HttpServletRequest request; + + @org.springframework.beans.factory.annotation.Autowired + public {{classname}}Controller(ObjectMapper objectMapper, HttpServletRequest request) { + this.objectMapper = objectMapper; + this.request = request; + } + {{#isJava8or11}} + + @Override + public Optional getObjectMapper() { + return Optional.ofNullable(objectMapper); + } + + @Override + public Optional getRequest() { + return Optional.ofNullable(request); + } + {{/isJava8or11}} + +{{/isDelegate}} +{{^isJava8or11}} +{{#operation}} + public {{#async}}Callable<{{/async}}ResponseEntity<{{>returnTypes}}>{{#async}}>{{/async}} {{operationId}}({{#allParams}}{{>queryParams}}{{>pathParams}}{{>headerParams}}{{>bodyParams}}{{>formParams}}{{#hasMore}},{{/hasMore}}{{/allParams}}) { + {{^isDelegate}} + {{^async}} + String accept = request.getHeader("Accept"); + {{#examples}} + if (accept != null && accept.contains("{{{contentType}}}")) { + try { + return new ResponseEntity<{{>returnTypes}}>(objectMapper.readValue("{{#lambdaRemoveLineBreak}}{{#lambdaEscapeDoubleQuote}}{{{example}}}{{/lambdaEscapeDoubleQuote}}{{/lambdaRemoveLineBreak}}", {{>exampleReturnTypes}}.class), HttpStatus.NOT_IMPLEMENTED); + } catch (IOException e) { + log.error("Couldn't serialize response for content type {{{contentType}}}", e); + return new ResponseEntity<{{>returnTypes}}>(HttpStatus.INTERNAL_SERVER_ERROR); + } + } + + {{/examples}} + return new ResponseEntity<{{>returnTypes}}>(HttpStatus.NOT_IMPLEMENTED); + {{/async}} + {{#async}} + return new CallablereturnTypes}}>>() { + @Override + public ResponseEntity<{{>returnTypes}}> call() { + String accept = request.getHeader("Accept"); + {{#examples}} + if (accept != null && accept.contains("{{{contentType}}}")) { + try { + return new ResponseEntity<{{>returnTypes}}>(objectMapper.readValue("{{#lambdaRemoveLineBreak}}{{#lambdaEscapeDoubleQuote}}{{{example}}}{{/lambdaEscapeDoubleQuote}}{{/lambdaRemoveLineBreak}}", {{>exampleReturnTypes}}.class), HttpStatus.NOT_IMPLEMENTED); + } catch (IOException e) { + log.error("Couldn't serialize response for content type {{{contentType}}}", e); + return new ResponseEntity<{{>returnTypes}}>(HttpStatus.INTERNAL_SERVER_ERROR); + } + } + + {{/examples}} + return new ResponseEntity<{{>returnTypes}}>(HttpStatus.NOT_IMPLEMENTED); + } + }; + {{/async}} + {{/isDelegate}} + {{#isDelegate}} + return delegate.{{operationId}}({{#allParams}}{{paramName}}{{#hasMore}}, {{/hasMore}}{{/allParams}}); + {{/isDelegate}} + } + +{{/operation}} +{{/isJava8or11}} +} +{{/operations}} \ No newline at end of file diff --git a/metadata-service/openapi-analytics-servlet/src/main/resources/JavaSpring/readme.txt b/metadata-service/openapi-analytics-servlet/src/main/resources/JavaSpring/readme.txt new file mode 100644 index 0000000000000..f127cd3e68553 --- /dev/null +++ b/metadata-service/openapi-analytics-servlet/src/main/resources/JavaSpring/readme.txt @@ -0,0 +1,2 @@ +Original: +https://github.com/swagger-api/swagger-codegen-generators/tree/master/src/main/resources/handlebars/JavaSpring \ No newline at end of file diff --git a/metadata-service/openapi-analytics-servlet/src/main/resources/open-api.yaml b/metadata-service/openapi-analytics-servlet/src/main/resources/open-api.yaml new file mode 100644 index 0000000000000..3c756b100699f --- /dev/null +++ b/metadata-service/openapi-analytics-servlet/src/main/resources/open-api.yaml @@ -0,0 +1,31 @@ +openapi: "3.0.0" +info: + title: Analytics API + description: This is a service for DataHub Analytics. + version: v1 + +paths: + /datahub_usage_events/_search: + post: + summary: Raw datahub_usage_event data. (Experimental) + operationId: "raw" + tags: + - "DataHub Usage" + requestBody: + content: + application/json: + schema: # Request body contents + type: string + example: >- + { + "query": { + "match_all": {} + } + } + responses: + '200': + description: "Success" + content: + application/json: + schema: + type: string diff --git a/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java new file mode 100644 index 0000000000000..98f0db8fd10ef --- /dev/null +++ b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIAnalyticsTestConfiguration.java @@ -0,0 +1,48 @@ +package io.datahubproject.openapi.config; + +import com.datahub.authentication.Actor; +import com.datahub.authentication.ActorType; +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationContext; +import com.datahub.authorization.AuthorizationResult; +import com.datahub.authorization.AuthorizerChain; +import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; +import org.elasticsearch.action.search.SearchResponse; +import org.mockito.Mockito; +import org.springframework.boot.test.context.TestConfiguration; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Primary; + +import java.io.IOException; +import java.util.Optional; + +import static io.datahubproject.openapi.delegates.DatahubUsageEventsImpl.DATAHUB_USAGE_INDEX; +import static org.mockito.ArgumentMatchers.*; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + + +@TestConfiguration +public class OpenAPIAnalyticsTestConfiguration { + @Bean + @Primary + public ElasticSearchService datahubUsageEventsApiDelegate() throws IOException { + ElasticSearchService elasticSearchService = mock(ElasticSearchService.class); + SearchResponse mockResp = mock(SearchResponse.class); + when(elasticSearchService.raw(eq(DATAHUB_USAGE_INDEX), anyString())) + .thenReturn(Optional.of(mockResp)); + return elasticSearchService; + } + + @Bean + public AuthorizerChain authorizerChain() { + AuthorizerChain authorizerChain = Mockito.mock(AuthorizerChain.class); + + Authentication authentication = Mockito.mock(Authentication.class); + when(authentication.getActor()).thenReturn(new Actor(ActorType.USER, "datahub")); + when(authorizerChain.authorize(any())).thenReturn(new AuthorizationResult(null, AuthorizationResult.Type.ALLOW, "")); + AuthenticationContext.setAuthentication(authentication); + + return authorizerChain; + } +} diff --git a/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java new file mode 100644 index 0000000000000..af2a24391fea8 --- /dev/null +++ b/metadata-service/openapi-analytics-servlet/src/test/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImplTest.java @@ -0,0 +1,44 @@ +package io.datahubproject.openapi.delegates; + +import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import io.datahubproject.openapi.config.OpenAPIAnalyticsTestConfiguration; +import io.datahubproject.openapi.config.SpringWebConfig; +import io.datahubproject.openapi.generated.controller.DatahubUsageEventsApiController; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Import; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; + + +@SpringBootTest(classes = {SpringWebConfig.class}) +@ComponentScan(basePackages = {"io.datahubproject.openapi.generated.controller"}) +@Import({DatahubUsageEventsImpl.class, OpenAPIAnalyticsTestConfiguration.class}) +public class DatahubUsageEventsImplTest extends AbstractTestNGSpringContextTests { + @BeforeTest + public void disableAssert() { + PathSpecBasedSchemaAnnotationVisitor.class.getClassLoader() + .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); + } + + @Autowired + private DatahubUsageEventsApiController analyticsController; + + @Test + public void initTest() { + assertNotNull(analyticsController); + } + + @Test + public void analyticsControllerTest() { + ResponseEntity resp = analyticsController.raw(""); + assertEquals(resp.getStatusCode(), HttpStatus.OK); + } +} diff --git a/metadata-service/openapi-entity-servlet/build.gradle b/metadata-service/openapi-entity-servlet/build.gradle new file mode 100644 index 0000000000000..7f9c472b91fac --- /dev/null +++ b/metadata-service/openapi-entity-servlet/build.gradle @@ -0,0 +1,82 @@ +plugins { + id 'java' + id 'org.hidetake.swagger.generator' +} + +dependencies { + + implementation project(':metadata-auth:auth-api') + implementation project(':metadata-service:auth-impl') + implementation project(':metadata-service:factories') + implementation project(':metadata-service:openapi-servlet') + implementation project(':metadata-models') + + implementation externalDependency.springBoot + implementation externalDependency.springCore + implementation externalDependency.springDocUI + implementation externalDependency.springWeb + implementation externalDependency.springWebMVC + implementation externalDependency.springBeans + implementation externalDependency.springContext + + implementation externalDependency.reflections + implementation externalDependency.slf4jApi + compileOnly externalDependency.lombok + + implementation externalDependency.antlr4Runtime + implementation externalDependency.antlr4 + + annotationProcessor externalDependency.lombok + + testImplementation externalDependency.testng + testImplementation externalDependency.mockito + testImplementation externalDependency.springBootTest + + swaggerCodegen externalDependency.swaggerCli + swaggerCodegen project(':metadata-service:openapi-entity-servlet:generators') +} + +sourceSets { + main { + java { + srcDirs = ["$buildDir/openapi/generated/src/main/java", 'src/main/java'] + } + } +} + + +task mergeApiComponents(dependsOn: ':metadata-models:generateJsonSchema') { + doLast { + mkdir("$buildDir/openapi") + File combined = file("$buildDir/openapi/open-api.yaml") + def components = file("${project(':metadata-models').projectDir}/src/generatedJsonSchema/combined/open-api-components.yaml").getText('UTF-8') + def api = file("$projectDir/src/main/resources/entity-v2.0.yml").getText('UTF-8') + def paths = file("${project(':metadata-models').projectDir}/src/generatedJsonSchema/combined/open-api-paths.yaml").getText('UTF-8') + combined.text = (components + paths + api).replaceAll("---\n", "\n") + } + outputs.file(file("$buildDir/openapi/open-api.yaml")) +} + +// https://github.com/int128/gradle-swagger-generator-plugin#task-type-generateswaggercode +task openApiGenerate(type: GenerateSwaggerCode, dependsOn: [mergeApiComponents, ':metadata-service:openapi-entity-servlet:generators:jar']) { + inputFile = file("$buildDir/openapi/open-api.yaml") + outputDir = file("$buildDir/openapi/generated") + + // custom generator class + language = 'io.datahubproject.CustomSpringCodegen' + + components = ["apis"] + templateDir = file("$projectDir/src/main/resources/JavaSpring") + additionalProperties = [ + 'group-id' : "io.datahubproject", + 'dateLibrary' : "java8", + 'java11' : "true", + 'modelPropertyNaming': "original", + 'modelPackage' : "io.datahubproject.openapi.generated", + 'apiPackage' : "io.datahubproject.openapi.generated.controller", + 'delegatePattern' : "false" + ] +} +tasks.getByName("compileJava").dependsOn(openApiGenerate) + +checkstyleMain.exclude '**/generated/**' \ No newline at end of file diff --git a/metadata-service/openapi-entity-servlet/generators/build.gradle b/metadata-service/openapi-entity-servlet/generators/build.gradle new file mode 100644 index 0000000000000..cb54ae0d9fc05 --- /dev/null +++ b/metadata-service/openapi-entity-servlet/generators/build.gradle @@ -0,0 +1,10 @@ +plugins { + id 'java' +} + +dependencies { + implementation externalDependency.swaggerCli + + compileOnly externalDependency.lombok + annotationProcessor externalDependency.lombok +} \ No newline at end of file diff --git a/metadata-service/openapi-entity-servlet/generators/src/main/java/io/datahubproject/CustomSpringCodegen.java b/metadata-service/openapi-entity-servlet/generators/src/main/java/io/datahubproject/CustomSpringCodegen.java new file mode 100644 index 0000000000000..ef36d8aa38785 --- /dev/null +++ b/metadata-service/openapi-entity-servlet/generators/src/main/java/io/datahubproject/CustomSpringCodegen.java @@ -0,0 +1,43 @@ +package io.datahubproject; + +import io.swagger.codegen.v3.generators.java.SpringCodegen; +import lombok.extern.slf4j.Slf4j; + +import java.util.List; +import java.util.Map; + + +@Slf4j +public class CustomSpringCodegen extends SpringCodegen { + + public CustomSpringCodegen() { + super(); + } + + @Override + public String getName() { + return "custom-spring"; + } + + @Override + public Map postProcessOperations(Map objs) { + Map result = super.postProcessOperations(objs); + List> imports = (List) objs.get("imports"); + + for (Map importMap : imports) { + for (String type : importMap.values()) { + if (type.contains("EntityRequest") && !type.contains(".Scroll")) { + additionalProperties.put("requestClass", type); + } + if (type.contains("EntityResponse") && !type.contains(".Scroll")) { + additionalProperties.put("responseClass", type); + } + if (type.contains("EntityResponse") && type.contains(".Scroll")) { + additionalProperties.put("scrollResponseClass", type); + } + } + } + + return result; + } +} diff --git a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java new file mode 100644 index 0000000000000..5d1065e80d419 --- /dev/null +++ b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java @@ -0,0 +1,411 @@ +package io.datahubproject.openapi.delegates; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.entity.EntityService; +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationContext; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.metadata.search.SearchService; +import io.datahubproject.openapi.dto.UpsertAspectRequest; +import io.datahubproject.openapi.dto.UrnResponseMap; +import io.datahubproject.openapi.entities.EntitiesController; +import com.datahub.authorization.AuthorizerChain; +import io.datahubproject.openapi.generated.BrowsePathsV2AspectRequestV2; +import io.datahubproject.openapi.generated.BrowsePathsV2AspectResponseV2; +import io.datahubproject.openapi.generated.DeprecationAspectRequestV2; +import io.datahubproject.openapi.generated.DeprecationAspectResponseV2; +import io.datahubproject.openapi.generated.DomainsAspectRequestV2; +import io.datahubproject.openapi.generated.DomainsAspectResponseV2; +import io.datahubproject.openapi.generated.GlobalTagsAspectRequestV2; +import io.datahubproject.openapi.generated.GlobalTagsAspectResponseV2; +import io.datahubproject.openapi.generated.GlossaryTermsAspectRequestV2; +import io.datahubproject.openapi.generated.GlossaryTermsAspectResponseV2; +import io.datahubproject.openapi.generated.OwnershipAspectRequestV2; +import io.datahubproject.openapi.generated.OwnershipAspectResponseV2; +import io.datahubproject.openapi.generated.SortOrder; +import io.datahubproject.openapi.generated.StatusAspectRequestV2; +import io.datahubproject.openapi.generated.StatusAspectResponseV2; +import io.datahubproject.openapi.exception.UnauthorizedException; +import io.datahubproject.openapi.util.OpenApiEntitiesUtil; +import com.datahub.authorization.ConjunctivePrivilegeGroup; +import com.datahub.authorization.DisjunctivePrivilegeGroup; +import com.linkedin.metadata.models.EntitySpec; +import com.datahub.authorization.ResourceSpec; +import com.linkedin.metadata.authorization.PoliciesConfig; +import com.google.common.collect.ImmutableList; +import com.datahub.authorization.AuthUtil; +import org.springframework.http.HttpEntity; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; + +import javax.validation.Valid; +import javax.validation.constraints.Min; +import java.net.URISyntaxException; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static io.datahubproject.openapi.util.ReflectionCache.toLowerFirst; + +public class EntityApiDelegateImpl { + final private EntityRegistry _entityRegistry; + final private EntityService _entityService; + final private SearchService _searchService; + final private EntitiesController _v1Controller; + final private AuthorizerChain _authorizationChain; + + final private boolean _restApiAuthorizationEnabled; + final private Class _reqClazz; + final private Class _respClazz; + final private Class _scrollRespClazz; + + final private StackWalker walker = StackWalker.getInstance(); + + public EntityApiDelegateImpl(EntityService entityService, SearchService searchService, EntitiesController entitiesController, + boolean restApiAuthorizationEnabled, AuthorizerChain authorizationChain, + Class reqClazz, Class respClazz, Class scrollRespClazz) { + this._entityService = entityService; + this._searchService = searchService; + this._entityRegistry = entityService.getEntityRegistry(); + this._v1Controller = entitiesController; + this._authorizationChain = authorizationChain; + this._restApiAuthorizationEnabled = restApiAuthorizationEnabled; + this._reqClazz = reqClazz; + this._respClazz = respClazz; + this._scrollRespClazz = scrollRespClazz; + } + + public ResponseEntity get(String urn, Boolean systemMetadata, List aspects) { + String[] requestedAspects = Optional.ofNullable(aspects).map(asp -> asp.stream().distinct().toArray(String[]::new)).orElse(null); + ResponseEntity result = _v1Controller.getEntities(new String[]{urn}, requestedAspects); + return ResponseEntity.of(OpenApiEntitiesUtil.convertEntity(Optional.ofNullable(result) + .map(HttpEntity::getBody).orElse(null), _respClazz, systemMetadata)); + } + + public ResponseEntity> create(List body) { + List aspects = body.stream() + .flatMap(b -> OpenApiEntitiesUtil.convertEntityToUpsert(b, _reqClazz, _entityRegistry).stream()) + .collect(Collectors.toList()); + _v1Controller.postEntities(aspects); + List responses = body.stream() + .map(req -> OpenApiEntitiesUtil.convertToResponse(req, _respClazz, _entityRegistry)) + .collect(Collectors.toList()); + return ResponseEntity.ok(responses); + } + + public ResponseEntity delete(String urn) { + _v1Controller.deleteEntities(new String[]{urn}, false); + return new ResponseEntity<>(HttpStatus.OK); + } + + public ResponseEntity head(String urn) { + try { + Urn entityUrn = Urn.createFromString(urn); + if (_entityService.exists(entityUrn)) { + return new ResponseEntity<>(HttpStatus.NO_CONTENT); + } else { + return new ResponseEntity<>(HttpStatus.NOT_FOUND); + } + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + + public ResponseEntity getAspect(String urn, Boolean systemMetadata, String aspect, Class entityRespClass, + Class aspectRespClazz) { + String[] requestedAspects = new String[]{aspect}; + ResponseEntity result = _v1Controller.getEntities(new String[]{urn}, requestedAspects); + return ResponseEntity.of(OpenApiEntitiesUtil.convertAspect(result.getBody(), aspect, entityRespClass, aspectRespClazz, + systemMetadata)); + } + + public ResponseEntity createAspect(String urn, String aspectName, AQ body, Class reqClazz, Class respClazz) { + UpsertAspectRequest aspectUpsert = OpenApiEntitiesUtil.convertAspectToUpsert(urn, body, reqClazz); + _v1Controller.postEntities(Stream.of(aspectUpsert).filter(Objects::nonNull).collect(Collectors.toList())); + AR response = OpenApiEntitiesUtil.convertToResponseAspect(body, respClazz); + return ResponseEntity.ok(response); + } + + public ResponseEntity headAspect(String urn, String aspect) { + try { + Urn entityUrn = Urn.createFromString(urn); + if (_entityService.exists(entityUrn, aspect)) { + return new ResponseEntity<>(HttpStatus.NO_CONTENT); + } else { + return new ResponseEntity<>(HttpStatus.NOT_FOUND); + } + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + + public ResponseEntity deleteAspect(String urn, String aspect) { + _entityService.deleteAspect(urn, aspect, Map.of(), false); + _v1Controller.deleteEntities(new String[]{urn}, false); + return new ResponseEntity<>(HttpStatus.OK); + } + + public ResponseEntity createDomains(DomainsAspectRequestV2 body, String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return createAspect(urn, methodNameToAspectName(methodName), body, DomainsAspectRequestV2.class, DomainsAspectResponseV2.class); + } + + public ResponseEntity createGlobalTags(GlobalTagsAspectRequestV2 body, String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return createAspect(urn, methodNameToAspectName(methodName), body, GlobalTagsAspectRequestV2.class, GlobalTagsAspectResponseV2.class); + } + + public ResponseEntity createGlossaryTerms(GlossaryTermsAspectRequestV2 body, String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return createAspect(urn, methodNameToAspectName(methodName), body, GlossaryTermsAspectRequestV2.class, GlossaryTermsAspectResponseV2.class); + } + + public ResponseEntity createOwnership(OwnershipAspectRequestV2 body, String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return createAspect(urn, methodNameToAspectName(methodName), body, OwnershipAspectRequestV2.class, OwnershipAspectResponseV2.class); + } + + public ResponseEntity createStatus(StatusAspectRequestV2 body, String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return createAspect(urn, methodNameToAspectName(methodName), body, StatusAspectRequestV2.class, StatusAspectResponseV2.class); + } + + public ResponseEntity deleteDomains(String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return deleteAspect(urn, methodNameToAspectName(methodName)); + } + + public ResponseEntity deleteGlobalTags(String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return deleteAspect(urn, methodNameToAspectName(methodName)); + } + + public ResponseEntity deleteGlossaryTerms(String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return deleteAspect(urn, methodNameToAspectName(methodName)); + } + + public ResponseEntity deleteOwnership(String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return deleteAspect(urn, methodNameToAspectName(methodName)); + } + + public ResponseEntity deleteStatus(String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return deleteAspect(urn, methodNameToAspectName(methodName)); + } + + public ResponseEntity getDomains(String urn, Boolean systemMetadata) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz, + DomainsAspectResponseV2.class); + } + + public ResponseEntity getGlobalTags(String urn, Boolean systemMetadata) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz, + GlobalTagsAspectResponseV2.class); + } + + public ResponseEntity getGlossaryTerms(String urn, Boolean systemMetadata) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz, + GlossaryTermsAspectResponseV2.class); + } + + public ResponseEntity getOwnership(String urn, Boolean systemMetadata) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz, + OwnershipAspectResponseV2.class); + } + + public ResponseEntity getStatus(String urn, Boolean systemMetadata) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz, + StatusAspectResponseV2.class); + } + + public ResponseEntity headDomains(String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return headAspect(urn, methodNameToAspectName(methodName)); + } + + public ResponseEntity headGlobalTags(String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return headAspect(urn, methodNameToAspectName(methodName)); + } + + public ResponseEntity headGlossaryTerms(String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return headAspect(urn, methodNameToAspectName(methodName)); + } + + public ResponseEntity headOwnership(String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return headAspect(urn, methodNameToAspectName(methodName)); + } + + public ResponseEntity headStatus(String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return headAspect(urn, methodNameToAspectName(methodName)); + } + + protected static String methodNameToAspectName(String methodName) { + return toLowerFirst(methodName.replaceFirst("^(get|head|delete|create)", "")); + } + + public ResponseEntity deleteDeprecation(String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return deleteAspect(urn, methodNameToAspectName(methodName)); + } + + public ResponseEntity deleteBrowsePathsV2(String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return deleteAspect(urn, methodNameToAspectName(methodName)); + } + + public ResponseEntity getDeprecation(String urn, @Valid Boolean systemMetadata) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz, + DeprecationAspectResponseV2.class); + } + + public ResponseEntity headDeprecation(String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return headAspect(urn, methodNameToAspectName(methodName)); + } + + public ResponseEntity createDeprecation(@Valid DeprecationAspectRequestV2 body, String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return createAspect(urn, methodNameToAspectName(methodName), body, DeprecationAspectRequestV2.class, + DeprecationAspectResponseV2.class); + } + + public ResponseEntity headBrowsePathsV2(String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return headAspect(urn, methodNameToAspectName(methodName)); + } + + public ResponseEntity getBrowsePathsV2(String urn, @Valid Boolean systemMetadata) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return getAspect(urn, systemMetadata, methodNameToAspectName(methodName), _respClazz, + BrowsePathsV2AspectResponseV2.class); + } + + public ResponseEntity createBrowsePathsV2(@Valid BrowsePathsV2AspectRequestV2 body, String urn) { + String methodName = walker.walk(frames -> frames + .findFirst() + .map(StackWalker.StackFrame::getMethodName)).get(); + return createAspect(urn, methodNameToAspectName(methodName), body, BrowsePathsV2AspectRequestV2.class, + BrowsePathsV2AspectResponseV2.class); + } + + public ResponseEntity scroll(@Valid Boolean systemMetadata, @Valid List aspects, @Min(1) @Valid Integer count, + @Valid String scrollId, @Valid List sort, @Valid SortOrder sortOrder, @Valid String query) { + + Authentication authentication = AuthenticationContext.getAuthentication(); + EntitySpec entitySpec = OpenApiEntitiesUtil.responseClassToEntitySpec(_entityRegistry, _respClazz); + checkScrollAuthorized(authentication, entitySpec); + + // TODO multi-field sort + SortCriterion sortCriterion = new SortCriterion(); + sortCriterion.setField(Optional.ofNullable(sort).map(s -> s.get(0)).orElse("urn")); + sortCriterion.setOrder(com.linkedin.metadata.query.filter.SortOrder.valueOf(Optional.ofNullable(sortOrder) + .map(Enum::name).orElse("ASCENDING"))); + + SearchFlags searchFlags = new SearchFlags() + .setFulltext(false) + .setSkipAggregates(true) + .setSkipHighlighting(true); + + ScrollResult result = _searchService.scrollAcrossEntities( + List.of(entitySpec.getName()), + query, null, sortCriterion, scrollId, null, count, searchFlags); + + String[] urns = result.getEntities().stream() + .map(SearchEntity::getEntity) + .map(Urn::toString) + .toArray(String[]::new); + String[] requestedAspects = Optional.ofNullable(aspects) + .map(asp -> asp.stream().distinct().toArray(String[]::new)) + .orElse(null); + List entities = Optional.ofNullable(_v1Controller.getEntities(urns, requestedAspects).getBody()) + .map(body -> body.getResponses().entrySet()) + .map(entries -> OpenApiEntitiesUtil.convertEntities(entries, _respClazz, systemMetadata)) + .orElse(List.of()); + + return ResponseEntity.of(OpenApiEntitiesUtil.convertToScrollResponse(_scrollRespClazz, result.getScrollId(), entities)); + } + + private void checkScrollAuthorized(Authentication authentication, EntitySpec entitySpec) { + String actorUrnStr = authentication.getActor().toUrnStr(); + DisjunctivePrivilegeGroup orGroup = new DisjunctivePrivilegeGroup(ImmutableList.of(new ConjunctivePrivilegeGroup( + ImmutableList.of(PoliciesConfig.GET_ENTITY_PRIVILEGE.getType())))); + + List> resourceSpecs = List.of(Optional.of(new ResourceSpec(entitySpec.getName(), ""))); + if (_restApiAuthorizationEnabled && !AuthUtil.isAuthorizedForResources(_authorizationChain, actorUrnStr, resourceSpecs, orGroup)) { + throw new UnauthorizedException(actorUrnStr + " is unauthorized to get entities."); + } + } +} diff --git a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/util/OpenApiEntitiesUtil.java b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/util/OpenApiEntitiesUtil.java new file mode 100644 index 0000000000000..13c2d83343aa0 --- /dev/null +++ b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/util/OpenApiEntitiesUtil.java @@ -0,0 +1,279 @@ +package io.datahubproject.openapi.util; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.util.Pair; +import io.datahubproject.openapi.dto.UpsertAspectRequest; +import io.datahubproject.openapi.dto.UrnResponseMap; +import io.datahubproject.openapi.generated.EntityResponse; +import io.datahubproject.openapi.generated.OneOfGenericAspectValue; +import io.datahubproject.openapi.generated.SystemMetadata; +import lombok.extern.slf4j.Slf4j; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +import static io.datahubproject.openapi.util.ReflectionCache.toLowerFirst; +import static io.datahubproject.openapi.util.ReflectionCache.toUpperFirst; + + +@Slf4j +public class OpenApiEntitiesUtil { + private final static String MODEL_VERSION = "V2"; + private final static String REQUEST_SUFFIX = "Request" + MODEL_VERSION; + private final static String RESPONSE_SUFFIX = "Response" + MODEL_VERSION; + + private final static String ASPECT_REQUEST_SUFFIX = "Aspect" + REQUEST_SUFFIX; + private final static String ASPECT_RESPONSE_SUFFIX = "Aspect" + RESPONSE_SUFFIX; + private final static String ENTITY_REQUEST_SUFFIX = "Entity" + REQUEST_SUFFIX; + private final static String ENTITY_RESPONSE_SUFFIX = "Entity" + RESPONSE_SUFFIX; + + private OpenApiEntitiesUtil() { + } + + private final static ReflectionCache REFLECT = ReflectionCache.builder() + .basePackage("io.datahubproject.openapi.generated") + .build(); + + + public static UpsertAspectRequest convertAspectToUpsert(String entityUrn, Object aspectRequest, Class aspectRequestClazz) { + try { + UpsertAspectRequest.UpsertAspectRequestBuilder builder = UpsertAspectRequest.builder(); + builder.entityType(Urn.createFromString(entityUrn).getEntityType()); + builder.entityUrn(entityUrn); + + // i.e. GlobalTagsAspectRequestV2 + if (aspectRequest != null) { + // i.e. GlobalTags + Method valueMethod = REFLECT.lookupMethod(aspectRequestClazz, "getValue"); + Object aspect = valueMethod.invoke(aspectRequest); + + if (aspect != null) { + builder.aspect((OneOfGenericAspectValue) aspect); + return builder.build(); + } + } + + return null; + } catch (Exception e) { + log.error("Error reflecting urn: {} aspect: {}", entityUrn, aspectRequestClazz.getName()); + throw new RuntimeException(e); + } + } + public static List convertEntityToUpsert(Object openapiEntity, Class fromClazz, EntityRegistry entityRegistry) { + final EntitySpec entitySpec = requestClassToEntitySpec(entityRegistry, fromClazz); + + return entitySpec.getAspectSpecs().stream() + .map(aspectSpec -> { + try { + UpsertAspectRequest.UpsertAspectRequestBuilder builder = UpsertAspectRequest.builder(); + builder.entityType(entitySpec.getName()); + builder.entityUrn((String) REFLECT.lookupMethod(fromClazz, "getUrn").invoke(openapiEntity)); + + String upperAspectName = toUpperFirst(aspectSpec.getName()); + Method aspectMethod = REFLECT.lookupMethod(fromClazz, "get" + upperAspectName); + + // i.e. GlobalTagsAspectRequestV2 + Object aspectRequest = aspectMethod.invoke(openapiEntity); + if (aspectRequest != null) { + Class aspectRequestClazz = REFLECT.lookupClass(upperAspectName + ASPECT_REQUEST_SUFFIX); + + // i.e. GlobalTags + Method valueMethod = REFLECT.lookupMethod(aspectRequestClazz, "getValue"); + Object aspect = valueMethod.invoke(aspectRequest); + + if (aspect != null) { + builder.aspect((OneOfGenericAspectValue) aspect); + return builder.build(); + } + } + + return null; + } catch (Exception e) { + log.error("Error reflecting entity: {} aspect: {}", entitySpec.getName(), aspectSpec.getName()); + throw new RuntimeException(e); + } + }).filter(Objects::nonNull).collect(Collectors.toList()); + } + + public static Optional convertAspect(UrnResponseMap urnResponseMap, String aspectName, Class entityClazz, + Class aspectClazz, boolean withSystemMetadata) { + return convertEntity(urnResponseMap, entityClazz, withSystemMetadata).map(entity -> { + try { + Method aspectMethod = REFLECT.lookupMethod(entityClazz, "get" + toUpperFirst(aspectName)); + return aspectClazz.cast(aspectMethod.invoke(entity)); + } catch (IllegalAccessException | InvocationTargetException e) { + throw new RuntimeException(e); + } + }); + + } + + public static Optional convertEntity(UrnResponseMap urnResponseMap, Class toClazz, boolean withSystemMetadata) { + return Optional.ofNullable(urnResponseMap) + .flatMap(respMap -> respMap.getResponses().entrySet().stream().findFirst()) + .flatMap(entry -> convertEntities(Set.of(entry), toClazz, withSystemMetadata).stream().findFirst()); + } + + public static List convertEntities(Set> entityResponseSet, Class toClazz, boolean withSystemMetadata) { + if (entityResponseSet != null) { + return entityResponseSet.stream().map(entry -> { + try { + // i.e. DataContractEntityResponseV2.Builder + Pair, Object> builderPair = REFLECT.getBuilder(toClazz); + Set builderMethods = Arrays.stream(builderPair.getFirst().getMethods()) + .map(Method::getName).collect(Collectors.toSet()); + + REFLECT.lookupMethod(builderPair, "urn", String.class).invoke(builderPair.getSecond(), entry.getKey()); + + entry.getValue().getAspects().entrySet().forEach(aspectEntry -> { + try { + if (builderMethods.contains(aspectEntry.getKey())) { + String upperFirstAspect = toUpperFirst(aspectEntry.getKey()); + Class aspectClazz = REFLECT.lookupClass(upperFirstAspect); + Class aspectRespClazz = REFLECT.lookupClass(upperFirstAspect + ASPECT_RESPONSE_SUFFIX); + Class aspectRespClazzBuilder = REFLECT.lookupClass(String.join("", + upperFirstAspect, ASPECT_RESPONSE_SUFFIX, + "$", upperFirstAspect, ASPECT_RESPONSE_SUFFIX, "Builder")); + Object aspectBuilder = REFLECT.lookupMethod(aspectRespClazz, "builder").invoke(null); + + REFLECT.lookupMethod(aspectRespClazzBuilder, "value", aspectClazz).invoke(aspectBuilder, aspectEntry.getValue().getValue()); + + if (withSystemMetadata) { + REFLECT.lookupMethod(aspectRespClazzBuilder, "systemMetadata", SystemMetadata.class) + .invoke(aspectBuilder, aspectEntry.getValue().getSystemMetadata()); + } + + REFLECT.lookupMethod(builderPair, aspectEntry.getKey(), aspectRespClazz).invoke(builderPair.getSecond(), + REFLECT.lookupMethod(aspectRespClazzBuilder, "build").invoke(aspectBuilder)); + } + } catch (IllegalAccessException | InvocationTargetException e) { + throw new RuntimeException(e); + } + }); + + return toClazz.cast(REFLECT.lookupMethod(builderPair, "build").invoke(builderPair.getSecond())); + } catch (IllegalAccessException | InvocationTargetException e) { + throw new RuntimeException(e); + } + }).collect(Collectors.toList()); + } + return List.of(); + } + + public static T convertToResponseAspect(I source, Class targetClazz) { + if (source != null) { + try { + Class sourceClazz = REFLECT.lookupClass(source.getClass().getSimpleName()); + Method valueMethod = REFLECT.lookupMethod(sourceClazz, "getValue"); + Object aspect = valueMethod.invoke(source); + + Pair, Object> builderPair = REFLECT.getBuilder(targetClazz); + REFLECT.lookupMethod(builderPair, "value", valueMethod.getReturnType()).invoke(builderPair.getSecond(), aspect); + + return targetClazz.cast(REFLECT.lookupMethod(builderPair, "build").invoke(builderPair.getSecond())); + } catch (InvocationTargetException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + return null; + } + + public static T convertToResponse(I source, Class targetClazz, EntityRegistry entityRegistry) { + if (source != null) { + try { + Class sourceClazz = REFLECT.lookupClass(source.getClass().getSimpleName()); + Pair, Object> builderPair = REFLECT.getBuilder(targetClazz); + copy(Pair.of(sourceClazz, source), builderPair, "urn"); + + final EntitySpec entitySpec = requestClassToEntitySpec(entityRegistry, sourceClazz); + entitySpec.getAspectSpecs().stream() + .forEach(aspectSpec -> { + try { + copy(Pair.of(sourceClazz, source), builderPair, aspectSpec.getName()); + } catch (InvocationTargetException | IllegalAccessException e) { + throw new RuntimeException(e); + } + }); + + return targetClazz.cast(REFLECT.lookupMethod(builderPair, "build").invoke(builderPair.getSecond())); + } catch (InvocationTargetException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + return null; + } + + public static Optional convertToScrollResponse(Class scrollRespClazz, String scrollId, List entityResults) { + if (entityResults != null) { + try { + Pair, Object> builderPair = REFLECT.getBuilder(scrollRespClazz); + REFLECT.lookupMethod(builderPair.getFirst(), "scrollId", String.class).invoke(builderPair.getSecond(), scrollId); + REFLECT.lookupMethod(builderPair.getFirst(), "entities", List.class).invoke(builderPair.getSecond(), entityResults); + + return Optional.of(scrollRespClazz.cast(REFLECT.lookupMethod(builderPair, "build").invoke(builderPair.getSecond()))); + + } catch (InvocationTargetException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + return Optional.empty(); + } + + + + private static void copy(Pair, Object> sourcePair, Pair, Object> builderPair, String method) + throws InvocationTargetException, IllegalAccessException { + Method sourceMethod = REFLECT.lookupMethod(sourcePair, String.format("get%s", toUpperFirst(method))); + if (sourceMethod != null) { + Class paramClazz = null; + Object param = null; + if (sourceMethod.getReturnType().getSimpleName().contains("Request")) { + Object sourceParam = sourceMethod.invoke(sourcePair.getSecond()); + if (sourceParam != null) { + paramClazz = REFLECT.lookupClass(sourceMethod.getReturnType().getSimpleName().replace("Request", "Response")); + Pair, Object> aspectBuilder = REFLECT.getBuilder(paramClazz); + + for (Method m : sourceMethod.getReturnType().getMethods()) { + if (m.getName().startsWith("get") && !Objects.equals("getClass", m.getName())) { + String getterMethod = m.getName().replaceFirst("^get", ""); + copy(Pair.of(sourceMethod.getReturnType(), sourceMethod.invoke(sourcePair.getSecond())), + aspectBuilder, getterMethod); + } + } + + param = REFLECT.lookupMethod(aspectBuilder, "build").invoke(aspectBuilder.getSecond()); + } + } else { + paramClazz = sourceMethod.getReturnType(); + param = sourceMethod.invoke(sourcePair.getSecond()); + } + + if (param != null) { + Method targetMethod = REFLECT.lookupMethod(builderPair, toLowerFirst(method), paramClazz); + targetMethod.invoke(builderPair.getSecond(), param); + } + } else { + log.info("Class {} doesn't container method {}", sourcePair.getFirst(), + String.format("get%s", toUpperFirst(method))); + } + } + + public static EntitySpec requestClassToEntitySpec(EntityRegistry entityRegistry, Class reqClazz) { + final String entityType = toLowerFirst(reqClazz.getSimpleName().replace(ENTITY_REQUEST_SUFFIX, "")); + return entityRegistry.getEntitySpec(entityType); + } + + public static EntitySpec responseClassToEntitySpec(EntityRegistry entityRegistry, Class respClazz) { + String entityType = toLowerFirst(respClazz.getSimpleName().replace(ENTITY_RESPONSE_SUFFIX, "")); + return entityRegistry.getEntitySpec(entityType); + } +} diff --git a/metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/api.mustache b/metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/api.mustache new file mode 100644 index 0000000000000..cbc5d9e1996ea --- /dev/null +++ b/metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/api.mustache @@ -0,0 +1,172 @@ +/** + * NOTE: This class is auto generated by the swagger code generator program ({{{generatorVersion}}}). + * https://github.com/swagger-api/swagger-codegen + * Do not edit the class manually. + */ +package {{package}}; + +{{#imports}}import {{import}}; +{{/imports}} +{{#jdk8-no-delegate}} +import com.fasterxml.jackson.databind.ObjectMapper; +{{/jdk8-no-delegate}} +{{#useOas2}} +import io.swagger.annotations.*; +{{/useOas2}} +{{^useOas2}} +import io.swagger.v3.oas.annotations.Parameter; +import io.swagger.v3.oas.annotations.enums.ParameterIn; +import io.swagger.v3.oas.annotations.responses.ApiResponses; +import io.swagger.v3.oas.annotations.responses.ApiResponse; +import io.swagger.v3.oas.annotations.media.ArraySchema; +import io.swagger.v3.oas.annotations.media.Content; +import io.swagger.v3.oas.annotations.media.Schema; +import io.swagger.v3.oas.annotations.security.SecurityRequirement; +{{/useOas2}} +{{#jdk8-no-delegate}} +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.HttpStatus; +{{/jdk8-no-delegate}} +import org.springframework.http.ResponseEntity; +{{#useBeanValidation}} +import org.springframework.validation.annotation.Validated; +{{/useBeanValidation}} +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestHeader; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestMethod; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.multipart.MultipartFile; +import org.springframework.web.bind.annotation.CookieValue; + +{{#jdk8-no-delegate}} +import javax.servlet.http.HttpServletRequest; +{{/jdk8-no-delegate}} +{{#useBeanValidation}} +import javax.validation.Valid; +import javax.validation.constraints.*; +{{/useBeanValidation}} +{{#jdk8-no-delegate}} +import java.io.IOException; +{{/jdk8-no-delegate}} +import java.util.List; +import java.util.Map; +{{#jdk8-no-delegate}} +import java.util.Optional; +{{/jdk8-no-delegate}} +{{^jdk8-no-delegate}} + {{#useOptional}} +import java.util.Optional; + {{/useOptional}} +{{/jdk8-no-delegate}} +{{#async}} +import java.util.concurrent.{{^isJava8or11}}Callable{{/isJava8or11}}{{#isJava8or11}}CompletableFuture{{/isJava8or11}}; +{{/async}} + +{{>generatedAnnotation}} +{{#useBeanValidation}} +@Validated +{{/useBeanValidation}} +{{#useOas2}} +@Api(value = "{{{baseName}}}", description = "the {{{baseName}}} API") +{{/useOas2}} +{{#operations}} +public interface {{classname}} { +{{#isJava8or11}} + + {{^isDelegate}} + Logger log = LoggerFactory.getLogger({{classname}}.class); + + {{#defaultInterfaces}}default {{/defaultInterfaces}}Optional getObjectMapper(){{^defaultInterfaces}};{{/defaultInterfaces}}{{#defaultInterfaces}}{ + return Optional.empty(); + }{{/defaultInterfaces}} + + {{#defaultInterfaces}}default {{/defaultInterfaces}}Optional getRequest(){{^defaultInterfaces}};{{/defaultInterfaces}}{{#defaultInterfaces}}{ + return Optional.empty(); + }{{/defaultInterfaces}} + + {{#defaultInterfaces}}default Optional getAcceptHeader() { + return getRequest().map(r -> r.getHeader("Accept")); + }{{/defaultInterfaces}} + {{/isDelegate}} + {{#isDelegate}} + {{classname}}Delegate getDelegate(); + {{/isDelegate}} +{{/isJava8or11}} +{{#operation}} +{{#contents}} +{{#@first}} + + {{#useOas2}} + @ApiOperation(value = "{{{summary}}}", nickname = "{{{operationId}}}", notes = "{{{notes}}}"{{#returnBaseType}}, response = {{{returnBaseType}}}.class{{/returnBaseType}}{{#returnContainer}}, responseContainer = "{{{returnContainer}}}"{{/returnContainer}}{{#hasAuthMethods}}, authorizations = { + {{#authMethods}}@Authorization(value = "{{name}}"{{#isOAuth}}, scopes = { {{#each scopes}} + @AuthorizationScope(scope = "{{@key}}", description = "{{this}}"){{^@last}},{{/@last}}{{/each}} + }{{/isOAuth}}){{#hasMore}}, + {{/hasMore}}{{/authMethods}} + }{{/hasAuthMethods}}, tags={ {{#vendorExtensions.x-tags}}"{{tag}}",{{/vendorExtensions.x-tags}} }) + @ApiResponses(value = { {{#responses}} + @ApiResponse(code = {{{code}}}, message = "{{{message}}}"{{#baseType}}, response = {{{baseType}}}.class{{/baseType}}{{#containerType}}, responseContainer = "{{{containerType}}}"{{/containerType}}){{#hasMore}},{{/hasMore}}{{/responses}} }) + {{#implicitHeaders}} + @ApiImplicitParams({ + {{#headerParams}} + {{>implicitHeader}} + {{/headerParams}} + }) + {{/implicitHeaders}} + {{/useOas2}} + {{^useOas2}} + @io.swagger.v3.oas.annotations.Operation(summary = "{{{summary}}}", description = "{{{notes}}}"{{#hasAuthMethods}}, security = { + {{#authMethods}}@SecurityRequirement(name = "{{name}}"{{#isOAuth}}, scopes = { + {{#each scopes}}"{{@key}}"{{^@last}}, + {{/@last}}{{/each}} + }{{/isOAuth}}){{#hasMore}}, + {{/hasMore}}{{/authMethods}} + }{{/hasAuthMethods}}, tags={ {{#vendorExtensions.x-tags}}"{{tag}}"{{#hasMore}}, {{/hasMore}}{{/vendorExtensions.x-tags}} }) + @ApiResponses(value = { {{#responses}} + @ApiResponse(responseCode = "{{{code}}}", description = "{{{message}}}"{{^vendorExtensions.x-java-is-response-void}}{{#baseType}}, content = @Content({{#schema.extensions.x-content-type}}mediaType = "{{schema.extensions.x-content-type}}", {{/schema.extensions.x-content-type}}{{^containerType}}schema = @Schema(implementation = {{{baseType}}}.class)){{/containerType}}{{#containerType}}array = @ArraySchema(schema = @Schema(implementation = {{{baseType}}}.class))){{/containerType}}{{/baseType}}{{/vendorExtensions.x-java-is-response-void}}){{#hasMore}}, + {{/hasMore}}{{/responses}} }) + {{/useOas2}} + @RequestMapping(value = "{{{path}}}",{{#singleContentTypes}}{{#hasProduces}} + produces = "{{{vendorExtensions.x-accepts}}}", {{/hasProduces}}{{#hasConsumes}} + consumes = "{{{vendorExtensions.x-contentType}}}",{{/hasConsumes}}{{/singleContentTypes}}{{^singleContentTypes}}{{#hasProduces}} + produces = { {{#produces}}"{{{mediaType}}}"{{#hasMore}}, {{/hasMore}}{{/produces}} }, {{/hasProduces}}{{#hasConsumes}} + consumes = { {{#consumes}}"{{{mediaType}}}"{{#hasMore}}, {{/hasMore}}{{/consumes}} }, {{/hasConsumes}}{{/singleContentTypes}} + method = RequestMethod.{{httpMethod}}) + {{#defaultInterfaces}}default {{/defaultInterfaces}}{{#responseWrapper}}{{.}}<{{/responseWrapper}}ResponseEntity<{{>returnTypes}}>{{#responseWrapper}}>{{/responseWrapper}} {{#delegate-method}}_{{/delegate-method}}{{operationId}}({{#parameters}}{{>queryParams}}{{>pathParams}}{{>headerParams}}{{>cookieParams}}{{>bodyParams}}{{>formParams}}{{#hasMore}}, {{/hasMore}}{{/parameters}}){{^defaultInterfaces}}{{#throwsException}} throws Exception{{/throwsException}};{{/defaultInterfaces}}{{#defaultInterfaces}}{{#throwsException}} throws Exception{{/throwsException}} { + {{#delegate-method}} + return {{operationId}}({{#parameters}}{{paramName}}{{#hasMore}}, {{/hasMore}}{{/parameters}}); + } + + // Override this method + default {{#responseWrapper}}{{.}}<{{/responseWrapper}}ResponseEntity<{{>returnTypes}}>{{#responseWrapper}}>{{/responseWrapper}} {{operationId}}({{#parameters}}{{^isBinary}}{{{dataType}}}{{/isBinary}}{{#isBinary}}MultipartFile{{/isBinary}} {{paramName}}{{#hasMore}},{{/hasMore}}{{/parameters}}) { + {{/delegate-method}} + {{^isDelegate}} + if(getObjectMapper().isPresent() && getAcceptHeader().isPresent()) { + {{#examples}} + if (getAcceptHeader().get().contains("{{{contentType}}}")) { + try { + return {{#async}}CompletableFuture.completedFuture({{/async}}new ResponseEntity<>(getObjectMapper().get().readValue("{{#lambdaRemoveLineBreak}}{{#lambdaEscapeDoubleQuote}}{{{example}}}{{/lambdaEscapeDoubleQuote}}{{/lambdaRemoveLineBreak}}", {{>exampleReturnTypes}}.class), HttpStatus.NOT_IMPLEMENTED){{#async}}){{/async}}; + } catch (IOException e) { + log.error("Couldn't serialize response for content type {{{contentType}}}", e); + return {{#async}}CompletableFuture.completedFuture({{/async}}new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR){{#async}}){{/async}}; + } + } + {{/examples}} + } else { + log.warn("ObjectMapper or HttpServletRequest not configured in default {{classname}} interface so no example is generated"); + } + return {{#async}}CompletableFuture.completedFuture({{/async}}new ResponseEntity<>(HttpStatus.NOT_IMPLEMENTED){{#async}}){{/async}}; + {{/isDelegate}} + {{#isDelegate}} + return getDelegate().{{operationId}}({{#parameters}}{{paramName}}{{#hasMore}}, {{/hasMore}}{{/parameters}}); + {{/isDelegate}} + }{{/defaultInterfaces}} + +{{/@first}} +{{/contents}} +{{/operation}} +} +{{/operations}} diff --git a/metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/apiController.mustache b/metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/apiController.mustache new file mode 100644 index 0000000000000..9499eba3f4b22 --- /dev/null +++ b/metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/apiController.mustache @@ -0,0 +1,127 @@ +package {{package}}; + +import io.datahubproject.openapi.delegates.EntityApiDelegateImpl; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.search.SearchService; +import io.datahubproject.openapi.entities.EntitiesController; +import com.datahub.authorization.AuthorizerChain; + +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.beans.factory.annotation.Value; + +{{#imports}}import {{import}}; +{{/imports}} + +{{^isDelegate}} +import com.fasterxml.jackson.databind.ObjectMapper; +{{/isDelegate}} + +{{#useOas2}} +import io.swagger.annotations.*; +{{/useOas2}} +{{^useOas2}} +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.Parameter; +import io.swagger.v3.oas.annotations.enums.ParameterIn; +import io.swagger.v3.oas.annotations.responses.ApiResponses; +import io.swagger.v3.oas.annotations.responses.ApiResponse; +import io.swagger.v3.oas.annotations.media.ArraySchema; +import io.swagger.v3.oas.annotations.media.Content; +import io.swagger.v3.oas.annotations.media.Schema; +import io.swagger.v3.oas.annotations.security.SecurityRequirement; +{{/useOas2}} +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; + +import org.springframework.web.bind.annotation.CookieValue; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestHeader; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +{{^useOas2}} +{{/useOas2}} +import org.springframework.web.multipart.MultipartFile; + + {{#useBeanValidation}} +import javax.validation.constraints.*; +import javax.validation.Valid; + {{/useBeanValidation}} + +{{^isDelegate}} +import javax.servlet.http.HttpServletRequest; + {{#isJava8or11}} +import java.util.Optional; + {{/isJava8or11}} +{{/isDelegate}} +{{^jdk8-no-delegate}} + {{#useOptional}} +import java.util.Optional; + {{/useOptional}} +{{/jdk8-no-delegate}} + + {{^isDelegate}} +import java.io.IOException; + {{/isDelegate}} +import java.util.List; +import java.util.Map; + {{#async}} +import java.util.concurrent.Callable; + {{/async}} + +{{>generatedAnnotation}} +{{#useOas2}} +{{/useOas2}} +{{^useOas2}} +{{/useOas2}} +{{#operations}} +@RestController +@RequestMapping("/v2/entity") +public class {{classname}}Controller implements {{classname}} { + + private static final Logger log = LoggerFactory.getLogger({{classname}}Controller.class); + + private final ObjectMapper objectMapper; + + private final HttpServletRequest request; + + private final EntityApiDelegateImpl<{{requestClass}}, {{responseClass}}, {{scrollResponseClass}}> delegate; + + @org.springframework.beans.factory.annotation.Autowired + public {{classname}}Controller(ObjectMapper objectMapper, HttpServletRequest request, EntityService entityService, + SearchService searchService, EntitiesController v1Controller, AuthorizerChain authorizationChain, + @Value("${authorization.restApiAuthorization:false}") boolean restApiAuthorizationEnabled) { + this.objectMapper = objectMapper; + this.request = request; + this.delegate = new EntityApiDelegateImpl<{{requestClass}}, {{responseClass}}, {{scrollResponseClass}}>(entityService, searchService, v1Controller, + restApiAuthorizationEnabled, authorizationChain, {{requestClass}}.class, {{responseClass}}.class, {{scrollResponseClass}}.class); + } + {{#isJava8or11}} + + @Override + public Optional getObjectMapper() { + return Optional.ofNullable(objectMapper); + } + + @Override + public Optional getRequest() { + return Optional.ofNullable(request); + } + {{/isJava8or11}} + +{{#operation}} +{{#contents}} +{{#@first}} + @Override + public ResponseEntity<{{>returnTypes}}>{{#async}}>{{/async}} {{operationId}}({{#parameters}}{{>queryParams}}{{>pathParams}}{{>headerParams}}{{>bodyParams}}{{>formParams}}{{>cookieParams}}{{#hasMore}},{{/hasMore}}{{/parameters}}) { + return delegate.{{operationId}}({{#allParams}}{{paramName}}{{#hasMore}}, {{/hasMore}}{{/allParams}}); + } + +{{/@first}} +{{/contents}} +{{/operation}} +} +{{/operations}} \ No newline at end of file diff --git a/metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/readme.txt b/metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/readme.txt new file mode 100644 index 0000000000000..f127cd3e68553 --- /dev/null +++ b/metadata-service/openapi-entity-servlet/src/main/resources/JavaSpring/readme.txt @@ -0,0 +1,2 @@ +Original: +https://github.com/swagger-api/swagger-codegen-generators/tree/master/src/main/resources/handlebars/JavaSpring \ No newline at end of file diff --git a/metadata-service/openapi-entity-servlet/src/main/resources/entity-v2.0.yml b/metadata-service/openapi-entity-servlet/src/main/resources/entity-v2.0.yml new file mode 100644 index 0000000000000..bebbf05e9334b --- /dev/null +++ b/metadata-service/openapi-entity-servlet/src/main/resources/entity-v2.0.yml @@ -0,0 +1,6 @@ + +openapi: "3.0.0" +info: + title: Entity API + description: This is a service for DataHub Entities. + version: v2 diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java new file mode 100644 index 0000000000000..b7e255b8c270e --- /dev/null +++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java @@ -0,0 +1,119 @@ +package io.datahubproject.openapi.config; + +import com.datahub.authentication.Actor; +import com.datahub.authentication.ActorType; +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationContext; +import com.datahub.authorization.AuthorizationResult; +import com.datahub.authorization.AuthorizerChain; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.EntityServiceImpl; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistryException; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchService; +import com.linkedin.metadata.systemmetadata.SystemMetadataService; +import com.linkedin.metadata.timeline.TimelineService; +import io.datahubproject.openapi.dto.UrnResponseMap; +import io.datahubproject.openapi.entities.EntitiesController; +import io.datahubproject.openapi.generated.EntityResponse; +import io.datahubproject.openapi.relationships.RelationshipsController; +import io.datahubproject.openapi.timeline.TimelineController; +import org.mockito.Mockito; +import org.springframework.boot.test.context.TestConfiguration; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Primary; +import org.springframework.http.ResponseEntity; + +import java.util.Arrays; +import java.util.Map; +import java.util.stream.Collectors; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + + +@TestConfiguration +public class OpenAPIEntityTestConfiguration { + @Bean + public ObjectMapper objectMapper() { + return new ObjectMapper(new YAMLFactory()); + } + + @Bean + @Primary + public EntityService entityService(final EntityRegistry mockRegistry) { + EntityService entityService = mock(EntityServiceImpl.class); + when(entityService.getEntityRegistry()).thenReturn(mockRegistry); + return entityService; + } + + @Bean + @Primary + public SearchService searchService() { + SearchService searchService = mock(SearchService.class); + when(searchService.scrollAcrossEntities(anyList(), any(), any(), any(), + any(), any(), anyInt(), any())) + .thenReturn(new ScrollResult().setEntities(new SearchEntityArray())); + + return searchService; + } + + @Bean + public AuthorizerChain authorizerChain() { + AuthorizerChain authorizerChain = Mockito.mock(AuthorizerChain.class); + + Authentication authentication = Mockito.mock(Authentication.class); + when(authentication.getActor()).thenReturn(new Actor(ActorType.USER, "datahub")); + when(authorizerChain.authorize(any())).thenReturn(new AuthorizationResult(null, AuthorizationResult.Type.ALLOW, "")); + AuthenticationContext.setAuthentication(authentication); + + return authorizerChain; + } + + @MockBean(name = "elasticSearchSystemMetadataService") + public SystemMetadataService systemMetadataService; + + @MockBean + public TimelineService timelineService; + + @Bean("entityRegistry") + @Primary + public ConfigEntityRegistry configEntityRegistry() throws EntityRegistryException { + return new ConfigEntityRegistry( + OpenAPIEntityTestConfiguration.class.getClassLoader().getResourceAsStream("entity-registry.yml")); + } + + /* Controllers not under this module */ + @Bean + @Primary + public EntitiesController entitiesController() { + EntitiesController entitiesController = mock(EntitiesController.class); + when(entitiesController.getEntities(any(), any())) + .thenAnswer(params -> { + String[] urns = params.getArgument(0); + String[] aspects = params.getArgument(1); + return ResponseEntity.ok(UrnResponseMap.builder() + .responses(Arrays.stream(urns) + .map(urn -> Map.entry(urn, EntityResponse.builder().urn(urn).build())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))) + .build()); + }); + + return entitiesController; + } + + @MockBean + public TimelineController timelineController; + + @MockBean + public RelationshipsController relationshipsController; +} diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/delegates/EntityApiDelegateImplTest.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/delegates/EntityApiDelegateImplTest.java new file mode 100644 index 0000000000000..fc2aae1a75ab8 --- /dev/null +++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/delegates/EntityApiDelegateImplTest.java @@ -0,0 +1,203 @@ +package io.datahubproject.openapi.delegates; + +import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import io.datahubproject.openapi.config.OpenAPIEntityTestConfiguration; +import io.datahubproject.openapi.config.SpringWebConfig; +import io.datahubproject.openapi.generated.BrowsePathEntry; +import io.datahubproject.openapi.generated.BrowsePathsV2; +import io.datahubproject.openapi.generated.BrowsePathsV2AspectRequestV2; +import io.datahubproject.openapi.generated.ChartEntityRequestV2; +import io.datahubproject.openapi.generated.ChartEntityResponseV2; +import io.datahubproject.openapi.generated.DatasetEntityRequestV2; +import io.datahubproject.openapi.generated.DatasetEntityResponseV2; +import io.datahubproject.openapi.generated.Deprecation; +import io.datahubproject.openapi.generated.DeprecationAspectRequestV2; +import io.datahubproject.openapi.generated.Domains; +import io.datahubproject.openapi.generated.DomainsAspectRequestV2; +import io.datahubproject.openapi.generated.GlobalTags; +import io.datahubproject.openapi.generated.GlobalTagsAspectRequestV2; +import io.datahubproject.openapi.generated.GlossaryTermAssociation; +import io.datahubproject.openapi.generated.GlossaryTerms; +import io.datahubproject.openapi.generated.GlossaryTermsAspectRequestV2; +import io.datahubproject.openapi.generated.Owner; +import io.datahubproject.openapi.generated.Ownership; +import io.datahubproject.openapi.generated.OwnershipAspectRequestV2; +import io.datahubproject.openapi.generated.OwnershipType; +import io.datahubproject.openapi.generated.ScrollChartEntityResponseV2; +import io.datahubproject.openapi.generated.ScrollDatasetEntityResponseV2; +import io.datahubproject.openapi.generated.Status; +import io.datahubproject.openapi.generated.StatusAspectRequestV2; +import io.datahubproject.openapi.generated.TagAssociation; +import io.datahubproject.openapi.generated.controller.ChartApiController; +import io.datahubproject.openapi.generated.controller.DatasetApiController; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Import; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import java.util.List; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; + + +@SpringBootTest(classes = {SpringWebConfig.class}) +@ComponentScan(basePackages = {"io.datahubproject.openapi.generated.controller"}) +@Import({OpenAPIEntityTestConfiguration.class}) +public class EntityApiDelegateImplTest extends AbstractTestNGSpringContextTests { + @BeforeTest + public void disableAssert() { + PathSpecBasedSchemaAnnotationVisitor.class.getClassLoader() + .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); + } + + @Autowired + private ChartApiController chartApiController; + @Autowired + private DatasetApiController datasetApiController; + + @Test + public void initTest() { + assertNotNull(chartApiController); + assertNotNull(datasetApiController); + } + + @Test + public void chartApiControllerTest() { + final String testUrn = "urn:li:chart:(looker,baz1)"; + + ChartEntityRequestV2 req = ChartEntityRequestV2.builder() + .urn(testUrn) + .build(); + ChartEntityResponseV2 resp = chartApiController.create(List.of(req)).getBody().get(0); + assertEquals(resp.getUrn(), testUrn); + + resp = chartApiController.get(testUrn, false, List.of()).getBody(); + assertEquals(resp.getUrn(), testUrn); + + ResponseEntity deleteResp = chartApiController.delete(testUrn); + assertEquals(deleteResp.getStatusCode(), HttpStatus.OK); + + ResponseEntity headResp = chartApiController.head(testUrn); + assertEquals(headResp.getStatusCode(), HttpStatus.NOT_FOUND); + + ResponseEntity scrollResp = chartApiController.scroll( + false, List.of(), 10, null, null, null, null); + assertEquals(scrollResp.getStatusCode(), HttpStatus.OK); + assertNotNull(scrollResp.getBody().getEntities()); + } + + @Test + public void datasetApiControllerTest() { + final String testUrn = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)"; + + DatasetEntityRequestV2 req = DatasetEntityRequestV2.builder() + .urn(testUrn) + .build(); + DatasetEntityResponseV2 resp = datasetApiController.create(List.of(req)).getBody().get(0); + assertEquals(resp.getUrn(), testUrn); + + resp = datasetApiController.get(testUrn, false, List.of()).getBody(); + assertEquals(resp.getUrn(), testUrn); + + ResponseEntity deleteResp = datasetApiController.delete(testUrn); + assertEquals(deleteResp.getStatusCode(), HttpStatus.OK); + + ResponseEntity headResp = datasetApiController.head(testUrn); + assertEquals(headResp.getStatusCode(), HttpStatus.NOT_FOUND); + + ResponseEntity scrollResp = datasetApiController.scroll( + false, List.of(), 10, null, null, null, null); + assertEquals(scrollResp.getStatusCode(), HttpStatus.OK); + assertNotNull(scrollResp.getBody().getEntities()); + } + + @Test + public void browsePathsTest() { + final String testUrn = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)"; + + BrowsePathsV2AspectRequestV2 req = BrowsePathsV2AspectRequestV2.builder() + .value(BrowsePathsV2.builder().path(List.of(BrowsePathEntry.builder().urn(testUrn) + .id("path").build())).build()).build(); + assertEquals(datasetApiController.createBrowsePathsV2(testUrn, req).getStatusCode(), HttpStatus.OK); + assertEquals(datasetApiController.deleteBrowsePathsV2(testUrn).getStatusCode(), HttpStatus.OK); + assertEquals(datasetApiController.getBrowsePathsV2(testUrn, false).getStatusCode(), HttpStatus.NOT_FOUND); + assertEquals(datasetApiController.headBrowsePathsV2(testUrn).getStatusCode(), HttpStatus.NOT_FOUND); + } + + @Test + public void deprecationTest() { + final String testUrn = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)"; + + DeprecationAspectRequestV2 req = DeprecationAspectRequestV2.builder() + .value(Deprecation.builder().deprecated(true).build()).build(); + assertEquals(datasetApiController.createDeprecation(testUrn, req).getStatusCode(), HttpStatus.OK); + assertEquals(datasetApiController.deleteDeprecation(testUrn).getStatusCode(), HttpStatus.OK); + assertEquals(datasetApiController.getDeprecation(testUrn, false).getStatusCode(), HttpStatus.NOT_FOUND); + assertEquals(datasetApiController.headDeprecation(testUrn).getStatusCode(), HttpStatus.NOT_FOUND); + } + + @Test + public void domainsTest() { + final String testUrn = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)"; + + DomainsAspectRequestV2 req = DomainsAspectRequestV2.builder() + .value(Domains.builder().domains(List.of("my_domain")).build()).build(); + assertEquals(datasetApiController.createDomains(testUrn, req).getStatusCode(), HttpStatus.OK); + assertEquals(datasetApiController.deleteDomains(testUrn).getStatusCode(), HttpStatus.OK); + assertEquals(datasetApiController.getDomains(testUrn, false).getStatusCode(), HttpStatus.NOT_FOUND); + assertEquals(datasetApiController.headDomains(testUrn).getStatusCode(), HttpStatus.NOT_FOUND); + } + + @Test + public void ownershipTest() { + final String testUrn = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)"; + + OwnershipAspectRequestV2 req = OwnershipAspectRequestV2.builder() + .value(Ownership.builder().owners(List.of(Owner.builder().owner("me").type(OwnershipType.BUSINESS_OWNER).build())).build()).build(); + assertEquals(datasetApiController.createOwnership(testUrn, req).getStatusCode(), HttpStatus.OK); + assertEquals(datasetApiController.deleteOwnership(testUrn).getStatusCode(), HttpStatus.OK); + assertEquals(datasetApiController.getOwnership(testUrn, false).getStatusCode(), HttpStatus.NOT_FOUND); + assertEquals(datasetApiController.headOwnership(testUrn).getStatusCode(), HttpStatus.NOT_FOUND); + } + + @Test + public void statusTest() { + final String testUrn = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)"; + + StatusAspectRequestV2 req = StatusAspectRequestV2.builder().value(Status.builder().removed(true).build()).build(); + assertEquals(datasetApiController.createStatus(testUrn, req).getStatusCode(), HttpStatus.OK); + assertEquals(datasetApiController.deleteStatus(testUrn).getStatusCode(), HttpStatus.OK); + assertEquals(datasetApiController.getStatus(testUrn, false).getStatusCode(), HttpStatus.NOT_FOUND); + assertEquals(datasetApiController.headStatus(testUrn).getStatusCode(), HttpStatus.NOT_FOUND); + } + + @Test + public void globalTagsTest() { + final String testUrn = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)"; + + GlobalTagsAspectRequestV2 req = GlobalTagsAspectRequestV2.builder() + .value(GlobalTags.builder().tags(List.of(TagAssociation.builder().tag("tag").build())).build()).build(); + assertEquals(datasetApiController.createGlobalTags(testUrn, req).getStatusCode(), HttpStatus.OK); + assertEquals(datasetApiController.deleteGlobalTags(testUrn).getStatusCode(), HttpStatus.OK); + assertEquals(datasetApiController.getGlobalTags(testUrn, false).getStatusCode(), HttpStatus.NOT_FOUND); + assertEquals(datasetApiController.headGlobalTags(testUrn).getStatusCode(), HttpStatus.NOT_FOUND); + } + + @Test + public void glossaryTermsTest() { + final String testUrn = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)"; + + GlossaryTermsAspectRequestV2 req = GlossaryTermsAspectRequestV2.builder() + .value(GlossaryTerms.builder().terms(List.of(GlossaryTermAssociation.builder().urn("term urn").build())).build()).build(); + assertEquals(datasetApiController.createGlossaryTerms(testUrn, req).getStatusCode(), HttpStatus.OK); + assertEquals(datasetApiController.deleteGlossaryTerms(testUrn).getStatusCode(), HttpStatus.OK); + assertEquals(datasetApiController.getGlossaryTerms(testUrn, false).getStatusCode(), HttpStatus.NOT_FOUND); + assertEquals(datasetApiController.headGlossaryTerms(testUrn).getStatusCode(), HttpStatus.NOT_FOUND); + } +} diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java new file mode 100644 index 0000000000000..8f87b041a7e03 --- /dev/null +++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/util/OpenApiEntitiesUtilTest.java @@ -0,0 +1,55 @@ +package io.datahubproject.openapi.util; + +import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.models.registry.EntityRegistry; +import io.datahubproject.openapi.config.OpenAPIEntityTestConfiguration; +import io.datahubproject.openapi.dto.UpsertAspectRequest; +import io.datahubproject.openapi.generated.ContainerEntityRequestV2; +import io.datahubproject.openapi.generated.ContainerKey; +import io.datahubproject.openapi.generated.ContainerKeyAspectRequestV2; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.PropertySource; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import java.util.List; + +import static org.junit.Assert.assertNotNull; +import static org.testng.AssertJUnit.assertEquals; + + +@Import({OpenAPIEntityTestConfiguration.class}) +@PropertySource(value = "classpath:/application.yml", factory = YamlPropertySourceFactory.class) +public class OpenApiEntitiesUtilTest extends AbstractTestNGSpringContextTests { + @Autowired + private EntityRegistry entityRegistry; + + @BeforeTest + public void disableAssert() { + PathSpecBasedSchemaAnnotationVisitor.class.getClassLoader() + .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); + } + + @Test + public void testInitialization() { + assertNotNull(entityRegistry); + } + + @Test + public void containerConversionTest() { + ContainerEntityRequestV2 test = ContainerEntityRequestV2.builder() + .urn("urn:li:container:123") + .containerKey(ContainerKeyAspectRequestV2.builder().value(ContainerKey.builder().guid("123").build()).build()) + .build(); + List expected = List.of(UpsertAspectRequest.builder() + .entityType("container") + .entityUrn("urn:li:container:123") + .aspect(ContainerKey.builder().guid("123").build()) + .build()); + + assertEquals(expected, OpenApiEntitiesUtil.convertEntityToUpsert(test, ContainerEntityRequestV2.class, entityRegistry)); + } +} diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java index 6148149ca6da4..9feb9c8e5640f 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/SpringWebConfig.java @@ -2,6 +2,7 @@ import io.datahubproject.openapi.converter.StringToChangeCategoryConverter; import io.swagger.v3.oas.annotations.OpenAPIDefinition; +import io.swagger.v3.oas.annotations.info.Info; import io.swagger.v3.oas.annotations.servers.Server; import java.util.List; import org.springframework.context.annotation.Configuration; @@ -16,7 +17,8 @@ @EnableWebMvc -@OpenAPIDefinition(servers = {@Server(url = "/openapi/", description = "Default Server URL")}) +@OpenAPIDefinition(info = @Info(title = "DataHub OpenAPI", version = "2.0.0"), + servers = {@Server(url = "/openapi/", description = "Default Server URL")}) @Configuration public class SpringWebConfig implements WebMvcConfigurer { diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java index 64d815af91aef..6439e2f31f7b0 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java @@ -52,7 +52,7 @@ import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; -import static com.linkedin.metadata.utils.PegasusUtils.*; +import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; @RestController @@ -201,7 +201,7 @@ public ResponseEntity> deleteEntities( .map(proposal -> MappingUtil.ingestProposal(proposal, actorUrnStr, _entityService)) .filter(Pair::getSecond) .map(Pair::getFirst) - .map(urnString -> new AspectRowSummary().urn(urnString)) + .map(urnString -> AspectRowSummary.builder().urn(urnString).build()) .collect(Collectors.toList())) .rowsDeletedFromEntityDeletion(deleteRequests.size()) .build())); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java index 4d0e5e7df29d5..68a8c8ca49235 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java @@ -7,13 +7,16 @@ import com.datahub.authorization.DisjunctivePrivilegeGroup; import com.datahub.authorization.ResourceSpec; import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import com.linkedin.avro2pegasus.events.KafkaAuditHeader; import com.linkedin.avro2pegasus.events.UUID; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.ByteString; +import com.linkedin.data.DataList; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.Aspect; @@ -33,27 +36,33 @@ import com.linkedin.util.Pair; import io.datahubproject.openapi.dto.RollbackRunResultDto; import io.datahubproject.openapi.dto.UpsertAspectRequest; -import io.datahubproject.openapi.generated.AspectRowSummary; -import io.datahubproject.openapi.generated.AspectType; -import io.datahubproject.openapi.generated.AuditStamp; -import io.datahubproject.openapi.generated.EntityResponse; -import io.datahubproject.openapi.generated.EnvelopedAspect; -import io.datahubproject.openapi.generated.MetadataChangeProposal; -import io.datahubproject.openapi.generated.OneOfEnvelopedAspectValue; -import io.datahubproject.openapi.generated.OneOfGenericAspectValue; -import io.datahubproject.openapi.generated.Status; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import io.datahubproject.openapi.generated.AspectRowSummary; +import io.datahubproject.openapi.generated.AspectType; +import io.datahubproject.openapi.generated.AuditStamp; +import io.datahubproject.openapi.generated.EntityResponse; +import io.datahubproject.openapi.generated.EnvelopedAspect; +import io.datahubproject.openapi.generated.MetadataChangeProposal; +import io.datahubproject.openapi.generated.OneOfEnvelopedAspectValue; +import io.datahubproject.openapi.generated.OneOfGenericAspectValue; +import io.datahubproject.openapi.generated.Status; import lombok.extern.slf4j.Slf4j; +import org.apache.avro.Schema; import org.reflections.Reflections; import org.reflections.scanners.SubTypesScanner; import org.springframework.beans.factory.config.BeanDefinition; @@ -63,8 +72,9 @@ import org.springframework.http.MediaType; import org.springframework.web.client.HttpClientErrorException; -import static com.linkedin.metadata.Constants.*; -import static java.nio.charset.StandardCharsets.*; +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; +import static io.datahubproject.openapi.util.ReflectionCache.toUpperFirst; +import static java.nio.charset.StandardCharsets.UTF_8; @Slf4j public class MappingUtil { @@ -72,24 +82,19 @@ private MappingUtil() { } + private static final JsonNodeFactory NODE_FACTORY = JsonNodeFactory.instance; private static final Map> ENVELOPED_ASPECT_TYPE_MAP = new HashMap<>(); private static final Map, String> ASPECT_NAME_MAP = new HashMap<>(); private static final Map> PEGASUS_TYPE_MAP = new HashMap<>(); - private static final Pattern CLASS_NAME_PATTERN = - Pattern.compile("(\"com\\.linkedin\\.)([a-z]+?\\.)+?(?[A-Z]\\w+?)(\":\\{)(?.*?)(}})"); - private static final Pattern GLOBAL_TAGS_PATTERN = - Pattern.compile("\"globalTags\":\\{"); - private static final Pattern GLOSSARY_TERMS_PATTERN = - Pattern.compile("\"glossaryTerms\":\\{"); private static final String DISCRIMINATOR = "__type"; - private static final Pattern CLASS_TYPE_NAME_PATTERN = - Pattern.compile("(\\s+?\"__type\"\\s+?:\\s+?\")(?\\w*?)(\"[,]?\\s+?)(?[\\S\\s]*?)(\\s+})"); private static final String PEGASUS_PACKAGE = "com.linkedin"; - private static final String GLOBAL_TAGS = "GlobalTags"; - private static final String GLOSSARY_TERMS = "GlossaryTerms"; + private static final ReflectionCache REFLECT_AVRO = ReflectionCache.builder() + .basePackage("com.linkedin.pegasus2avro").build(); + private static final ReflectionCache REFLECT_OPENAPI = ReflectionCache.builder() + .basePackage("io.datahubproject.openapi.generated").build(); static { // Build a map from __type name to generated class @@ -117,59 +122,75 @@ public static Map mapServiceResponse(Map mapEnvelopedAspect(entry.getValue(), objectMapper)))); + return EntityResponse.builder() + .entityName(entityResponse.getEntityName()) + .urn(entityResponse.getUrn().toString()) + .aspects(entityResponse.getAspects() + .entrySet() + .stream() + .collect(Collectors.toMap(Map.Entry::getKey, entry -> mapEnvelopedAspect(entry.getValue(), objectMapper)))).build(); } public static EnvelopedAspect mapEnvelopedAspect(com.linkedin.entity.EnvelopedAspect envelopedAspect, - ObjectMapper objectMapper) { - return new EnvelopedAspect() - .name(envelopedAspect.getName()) - .timestamp(envelopedAspect.getTimestamp()) - .version(envelopedAspect.getVersion()) - .type(AspectType.fromValue(envelopedAspect.getType().name().toUpperCase(Locale.ROOT))) - .created(objectMapper.convertValue(envelopedAspect.getCreated().data(), AuditStamp.class)) - .value(mapAspectValue(envelopedAspect.getName(), envelopedAspect.getValue(), objectMapper)); + ObjectMapper objectMapper) { + return EnvelopedAspect.builder() + .name(envelopedAspect.getName()) + .timestamp(envelopedAspect.getTimestamp()) + .version(envelopedAspect.getVersion()) + .type(AspectType.fromValue(envelopedAspect.getType().name().toUpperCase(Locale.ROOT))) + .created(objectMapper.convertValue(envelopedAspect.getCreated().data(), AuditStamp.class)) + .value(mapAspectValue(envelopedAspect.getName(), envelopedAspect.getValue(), objectMapper)).build(); + } + + private static DataMap insertDiscriminator(@Nullable Class parentClazz, DataMap dataMap) { + if (REFLECT_OPENAPI.lookupMethod(parentClazz, "get__type") != null) { + dataMap.put(DISCRIMINATOR, parentClazz.getSimpleName()); + } + + Set> requiresDiscriminator = dataMap.entrySet().stream() + .filter(e -> e.getValue() instanceof DataMap) + .filter(e -> e.getKey().startsWith(PEGASUS_PACKAGE + ".")) + .map(e -> Map.entry(e.getKey(), (DataMap) e.getValue())) + .collect(Collectors.toSet()); + requiresDiscriminator.forEach(e -> { + dataMap.remove(e.getKey()); + dataMap.put(DISCRIMINATOR, e.getKey().substring(e.getKey().lastIndexOf('.') + 1)); + dataMap.putAll(e.getValue()); + }); + + Set> recurse = dataMap.entrySet().stream() + .filter(e -> e.getValue() instanceof DataMap || e.getValue() instanceof DataList) + .flatMap(e -> { + if (e.getValue() instanceof DataList) { + return ((DataList) e.getValue()).stream() + .filter(item -> item instanceof DataMap) + .map(item -> Pair.of((String) null, (DataMap) item)); + } else { + return Stream.of(Pair.of(e.getKey(), (DataMap) e.getValue())); + } + }).collect(Collectors.toSet()); + + recurse.forEach(e -> { + if (e.getKey() != null) { + Class getterClazz = null; + if (parentClazz != null) { + Method getMethod = REFLECT_OPENAPI.lookupMethod(parentClazz, "get" + toUpperFirst(e.getKey())); + getterClazz = getMethod.getReturnType(); + } + insertDiscriminator(getterClazz, e.getValue()); + } else { + insertDiscriminator(null, e.getValue()); + } + }); + + return dataMap; } public static OneOfEnvelopedAspectValue mapAspectValue(String aspectName, Aspect aspect, ObjectMapper objectMapper) { Class aspectClass = ENVELOPED_ASPECT_TYPE_MAP.get(aspectName); - DataMap wrapper = aspect.data(); - wrapper.put(DISCRIMINATOR, aspectClass.getSimpleName()); - String dataMapAsJson; + DataMap wrapper = insertDiscriminator(aspectClass, aspect.data()); try { - dataMapAsJson = objectMapper.writeValueAsString(wrapper); - Matcher classNameMatcher = CLASS_NAME_PATTERN.matcher(dataMapAsJson); - while (classNameMatcher.find()) { - String className = classNameMatcher.group("className"); - String content = classNameMatcher.group("content"); - StringBuilder replacement = new StringBuilder("\"" + DISCRIMINATOR + "\" : \"" + className + "\""); - - if (content.length() > 0) { - replacement.append(",") - .append(content); - } - replacement.append("}"); - dataMapAsJson = classNameMatcher.replaceFirst(Matcher.quoteReplacement(replacement.toString())); - classNameMatcher = CLASS_NAME_PATTERN.matcher(dataMapAsJson); - } - // Global Tags & Glossary Terms will not have the explicit class name in the DataMap, so we handle them differently - Matcher globalTagsMatcher = GLOBAL_TAGS_PATTERN.matcher(dataMapAsJson); - while (globalTagsMatcher.find()) { - String replacement = "\"globalTags\" : {\"" + DISCRIMINATOR + "\" : \"GlobalTags\","; - dataMapAsJson = globalTagsMatcher.replaceFirst(Matcher.quoteReplacement(replacement)); - globalTagsMatcher = GLOBAL_TAGS_PATTERN.matcher(dataMapAsJson); - } - Matcher glossaryTermsMatcher = GLOSSARY_TERMS_PATTERN.matcher(dataMapAsJson); - while (glossaryTermsMatcher.find()) { - String replacement = "\"glossaryTerms\" : {\"" + DISCRIMINATOR + "\" : \"GlossaryTerms\","; - dataMapAsJson = glossaryTermsMatcher.replaceFirst(Matcher.quoteReplacement(replacement)); - glossaryTermsMatcher = GLOSSARY_TERMS_PATTERN.matcher(dataMapAsJson); - } + String dataMapAsJson = objectMapper.writeValueAsString(wrapper); return objectMapper.readValue(dataMapAsJson, aspectClass); } catch (JsonProcessingException e) { throw new RuntimeException(e); @@ -206,36 +227,79 @@ private static String getAspectName(Class cls) { return new String(c); } + private static Optional shouldDiscriminate(String parentShortClass, String fieldName, ObjectNode node) { + try { + if (parentShortClass != null) { + Class pegasus2AvroClazz = REFLECT_AVRO.lookupClass(parentShortClass, true); + Method getClassSchema = REFLECT_AVRO.lookupMethod(pegasus2AvroClazz, "getClassSchema"); + Schema avroSchema = (Schema) getClassSchema.invoke(null); + Schema.Field avroField = avroSchema.getField(fieldName); + + if (avroField.schema().isUnion()) { + Class discriminatedClazz = REFLECT_AVRO.lookupClass(node.get(DISCRIMINATOR).asText(), true); + return Optional.of(discriminatedClazz.getName().replace(".pegasus2avro", "")); + } + } + + // check leaf + Iterator itr = node.fieldNames(); + itr.next(); + if (!itr.hasNext()) { // only contains discriminator + Class discriminatedClazz = REFLECT_AVRO.lookupClass(node.get(DISCRIMINATOR).asText(), true); + return Optional.of(discriminatedClazz.getName().replace(".pegasus2avro", "")); + } + + } catch (IllegalAccessException | InvocationTargetException e) { + throw new RuntimeException(e); + } + return Optional.empty(); + } + + private static void replaceDiscriminator(ObjectNode node) { + replaceDiscriminator(null, null, null, node); + } + private static void replaceDiscriminator(@Nullable ObjectNode parentNode, @Nullable String parentDiscriminator, + @Nullable String propertyName, @Nonnull ObjectNode node) { + + final String discriminator; + if (node.isObject() && node.has(DISCRIMINATOR)) { + Optional discriminatorClassName = shouldDiscriminate(parentDiscriminator, propertyName, node); + if (parentNode != null && discriminatorClassName.isPresent()) { + discriminator = node.remove(DISCRIMINATOR).asText(); + parentNode.remove(propertyName); + parentNode.set(propertyName, NODE_FACTORY.objectNode().set(discriminatorClassName.get(), node)); + } else { + discriminator = node.remove(DISCRIMINATOR).asText(); + } + } else { + discriminator = null; + } + + List> objectChildren = new LinkedList<>(); + node.fields().forEachRemaining(entry -> { + if (entry.getValue().isObject()) { + objectChildren.add(entry); + } else if (entry.getValue().isArray()) { + entry.getValue().forEach(i -> { + if (i.isObject()) { + objectChildren.add(Map.entry(entry.getKey(), i)); + } + }); + } + }); + + objectChildren.forEach(entry -> + replaceDiscriminator(node, discriminator, entry.getKey(), (ObjectNode) entry.getValue()) + ); + } @Nonnull public static GenericAspect convertGenericAspect(@Nonnull io.datahubproject.openapi.generated.GenericAspect genericAspect, ObjectMapper objectMapper) { try { ObjectNode jsonTree = (ObjectNode) objectMapper.valueToTree(genericAspect).get("value"); - jsonTree.remove(DISCRIMINATOR); + replaceDiscriminator(jsonTree); String pretty = objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(jsonTree); - Matcher classTypeNameMatcher = CLASS_TYPE_NAME_PATTERN.matcher(pretty); - while (classTypeNameMatcher.find()) { - String classTypeName = classTypeNameMatcher.group("classTypeName"); - String content = classTypeNameMatcher.group("content"); - StringBuilder replacement = new StringBuilder(); - // Global Tags & Glossary Terms get used as both a union type and a non-union type, in the DataMap this means - // that it does not want the explicit class name if it is being used explicitly as a non-union type field on an aspect - if (!GLOBAL_TAGS.equals(classTypeName) && !GLOSSARY_TERMS.equals(classTypeName)) { - String pegasusClassName = PEGASUS_TYPE_MAP.get(classTypeName).getName(); - replacement.append("\"").append(pegasusClassName).append("\" : {"); - - if (content.length() > 0) { - replacement.append(content); - } - replacement.append("}}"); - } else { - replacement.append(content) - .append("}"); - } - pretty = classTypeNameMatcher.replaceFirst(Matcher.quoteReplacement(replacement.toString())); - classTypeNameMatcher = CLASS_TYPE_NAME_PATTERN.matcher(pretty); - } return new GenericAspect().setContentType(genericAspect.getContentType()) .setValue(ByteString.copyString(pretty, UTF_8)); } catch (JsonProcessingException e) { @@ -297,16 +361,16 @@ public static Pair ingestProposal(com.linkedin.mxe.MetadataChan } public static MetadataChangeProposal mapToProposal(UpsertAspectRequest aspectRequest) { - MetadataChangeProposal metadataChangeProposal = new MetadataChangeProposal(); + MetadataChangeProposal.MetadataChangeProposalBuilder metadataChangeProposal = MetadataChangeProposal.builder(); io.datahubproject.openapi.generated.GenericAspect - genericAspect = new io.datahubproject.openapi.generated.GenericAspect() + genericAspect = io.datahubproject.openapi.generated.GenericAspect.builder() .value(aspectRequest.getAspect()) - .contentType(MediaType.APPLICATION_JSON_VALUE); + .contentType(MediaType.APPLICATION_JSON_VALUE).build(); io.datahubproject.openapi.generated.GenericAspect keyAspect = null; if (aspectRequest.getEntityKeyAspect() != null) { - keyAspect = new io.datahubproject.openapi.generated.GenericAspect() + keyAspect = io.datahubproject.openapi.generated.GenericAspect.builder() .contentType(MediaType.APPLICATION_JSON_VALUE) - .value(aspectRequest.getEntityKeyAspect()); + .value(aspectRequest.getEntityKeyAspect()).build(); } metadataChangeProposal.aspect(genericAspect) .changeType(io.datahubproject.openapi.generated.ChangeType.UPSERT) @@ -315,7 +379,7 @@ public static MetadataChangeProposal mapToProposal(UpsertAspectRequest aspectReq .entityUrn(aspectRequest.getEntityUrn()) .entityType(aspectRequest.getEntityType()); - return metadataChangeProposal; + return metadataChangeProposal.build(); } public static com.linkedin.mxe.MetadataChangeProposal mapToServiceProposal(MetadataChangeProposal metadataChangeProposal, @@ -385,7 +449,7 @@ public static UpsertAspectRequest createStatusRemoval(Urn urn, EntityService ent throw new IllegalArgumentException("Entity type is not valid for soft deletes: " + urn.getEntityType()); } return UpsertAspectRequest.builder() - .aspect(new Status().removed(true)) + .aspect(Status.builder().removed(true).build()) .entityUrn(urn.toString()) .entityType(urn.getEntityType()) .build(); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/ReflectionCache.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/ReflectionCache.java new file mode 100644 index 0000000000000..12f7652aff587 --- /dev/null +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/ReflectionCache.java @@ -0,0 +1,138 @@ +package io.datahubproject.openapi.util; + +import com.google.common.reflect.ClassPath; +import com.linkedin.util.Pair; +import lombok.Builder; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Function; +import java.util.stream.Collectors; + +@Slf4j +@Builder +public class ReflectionCache { + private static final ConcurrentHashMap METHOD_CACHE = new ConcurrentHashMap<>(); + private static final ConcurrentHashMap> CLASS_CACHE = new ConcurrentHashMap<>(); + + private final String basePackage; + private final Set subPackages; + @Builder.Default // appropriate for lombok + private final Function, String> getBuilderName = clazz -> + String.join("", clazz.getSimpleName(), "$", clazz.getSimpleName(), "Builder"); + + public static class ReflectionCacheBuilder { + public ReflectionCacheBuilder basePackage(String basePackage) { + return basePackage(basePackage, Set.of()); + } + + public ReflectionCacheBuilder basePackage(String basePackage, Set packageExclusions) { + this.basePackage = basePackage; + return subPackages(findSubPackages(basePackage, Optional.ofNullable(packageExclusions).orElse(Set.of()))); + } + + private ReflectionCacheBuilder subPackages(Set subPackages) { + this.subPackages = subPackages; + return this; + } + + private Set findSubPackages(String packageName, Set exclusions) { + try { + return ClassPath.from(getClass().getClassLoader()) + .getAllClasses() + .stream() + .filter(clazz -> exclusions.stream().noneMatch(excl -> clazz.getPackageName().startsWith(excl)) + && !clazz.getName().contains("$") && clazz.getName().startsWith(packageName)) + .map(ClassPath.ClassInfo::getPackageName) + .collect(Collectors.toSet()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + public Method lookupMethod(Class clazz, String method, Class... parameters) { + if (clazz == null) { + return null; + } else { + return METHOD_CACHE.computeIfAbsent( + String.join("_", clazz.getName(), method), + key -> { + try { + log.debug("Lookup: " + clazz.getName() + " Method: " + method + " Parameters: " + Arrays.toString(parameters)); + return clazz.getDeclaredMethod(method, parameters); + } catch (NoSuchMethodException e) { + return null; + } + } + ); + } + } + + public Class lookupClass(String className, boolean searchSubclass) { + if (!searchSubclass) { + return lookupClass(className); + } else { + List subclasses = new LinkedList<>(); + subclasses.add(basePackage); + if (subPackages != null) { + subclasses.addAll(subPackages); + } + + for (String packageName : subclasses) { + try { + return cachedClassLookup(packageName, className); + } catch (Exception e) { + log.debug("Class not found {}.{} ... continuing search", packageName, className); + } + } + } + throw new ClassCastException(String.format("Could not locate %s in package %s", className, basePackage)); + } + + public Class lookupClass(String className) { + return cachedClassLookup(basePackage, className); + } + + private Class cachedClassLookup(String packageName, String className) { + return CLASS_CACHE.computeIfAbsent( + String.format("%s.%s", packageName, className), + key -> { + try { + log.debug("Lookup: " + key); + return Class.forName(key); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + ); + } + + /** + * Get builder instance and class + */ + public Pair, Object> getBuilder(Class toClazz) throws InvocationTargetException, IllegalAccessException { + Class toClazzBuilder = lookupClass(getBuilderName.apply(toClazz)); + return Pair.of(toClazzBuilder, lookupMethod(toClazz, "builder").invoke(null)); + } + + public Method lookupMethod(Pair, Object> builderPair, String method, Class... parameters) { + return lookupMethod(builderPair.getFirst(), method, parameters); + } + + public static String toLowerFirst(String s) { + return s.substring(0, 1).toLowerCase() + s.substring(1); + } + + public static String toUpperFirst(String s) { + return s.substring(0, 1).toUpperCase() + s.substring(1); + } +} diff --git a/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java b/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java index 229e71168557d..6c2ec108fe493 100644 --- a/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java @@ -90,85 +90,86 @@ public void setup() public void testIngestDataset() { List datasetAspects = new ArrayList<>(); UpsertAspectRequest viewProperties = UpsertAspectRequest.builder() - .aspect(new ViewProperties() + .aspect(ViewProperties.builder() .viewLogic(S) .viewLanguage(S) - .materialized(true)) + .materialized(true).build()) .entityType(DATASET_ENTITY_NAME) .entityUrn(DATASET_URN) .build(); datasetAspects.add(viewProperties); UpsertAspectRequest subTypes = UpsertAspectRequest.builder() - .aspect(new SubTypes() - .typeNames(Collections.singletonList(S))) + .aspect(SubTypes.builder() + .typeNames(Collections.singletonList(S)).build()) .entityType(DATASET_ENTITY_NAME) - .entityKeyAspect(new DatasetKey() + .entityKeyAspect(DatasetKey.builder() .name("name") .platform(DATA_PLATFORM_URN) - .origin(FabricType.PROD)) + .origin(FabricType.PROD).build()) .build(); datasetAspects.add(subTypes); UpsertAspectRequest datasetProfile = UpsertAspectRequest.builder() - .aspect(new DatasetProfile().timestampMillis(0L).addFieldProfilesItem( - new DatasetFieldProfile() - .fieldPath(S) - .histogram(new Histogram() - .boundaries(Collections.singletonList(S)))) - ) - .entityType(DATASET_ENTITY_NAME) - .entityKeyAspect(new DatasetKey() - .name("name") - .platform(DATA_PLATFORM_URN) - .origin(FabricType.PROD)) - .build(); + .aspect(DatasetProfile.builder().build().timestampMillis(0L).addFieldProfilesItem( + DatasetFieldProfile.builder() + .fieldPath(S) + .histogram(Histogram.builder() + .boundaries(Collections.singletonList(S)).build()).build() + ) + ) + .entityType(DATASET_ENTITY_NAME) + .entityKeyAspect(DatasetKey.builder() + .name("name") + .platform(DATA_PLATFORM_URN) + .origin(FabricType.PROD).build()) + .build(); datasetAspects.add(datasetProfile); UpsertAspectRequest schemaMetadata = UpsertAspectRequest.builder() - .aspect(new SchemaMetadata() + .aspect(SchemaMetadata.builder() .schemaName(S) .dataset(DATASET_URN) .platform(DATA_PLATFORM_URN) .hash(S) .version(0L) - .platformSchema(new MySqlDDL().tableSchema(S)) - .fields(Collections.singletonList(new SchemaField() + .platformSchema(MySqlDDL.builder().tableSchema(S).build()) + .fields(Collections.singletonList(SchemaField.builder() .fieldPath(S) .nativeDataType(S) - .type(new SchemaFieldDataType().type(new StringType())) + .type(SchemaFieldDataType.builder().type(StringType.builder().build()).build()) .description(S) - .globalTags(new GlobalTags() - .tags(Collections.singletonList(new TagAssociation() - .tag(TAG_URN)))) - .glossaryTerms(new GlossaryTerms() - .terms(Collections.singletonList(new GlossaryTermAssociation() - .urn(GLOSSARY_TERM_URN))) - .auditStamp(new AuditStamp() + .globalTags(GlobalTags.builder() + .tags(Collections.singletonList(TagAssociation.builder() + .tag(TAG_URN).build())).build()) + .glossaryTerms(GlossaryTerms.builder() + .terms(Collections.singletonList(GlossaryTermAssociation.builder() + .urn(GLOSSARY_TERM_URN).build())) + .auditStamp(AuditStamp.builder() .time(0L) - .actor(CORPUSER_URN))) + .actor(CORPUSER_URN).build()).build()).build() ) - )) + ).build()) .entityType(DATASET_ENTITY_NAME) - .entityKeyAspect(new DatasetKey() + .entityKeyAspect(DatasetKey.builder() .name("name") .platform(DATA_PLATFORM_URN) - .origin(FabricType.PROD)) + .origin(FabricType.PROD).build()) .build(); datasetAspects.add(schemaMetadata); UpsertAspectRequest glossaryTerms = UpsertAspectRequest.builder() - .aspect(new GlossaryTerms() - .terms(Collections.singletonList(new GlossaryTermAssociation() - .urn(GLOSSARY_TERM_URN))) - .auditStamp(new AuditStamp() + .aspect(GlossaryTerms.builder() + .terms(Collections.singletonList(GlossaryTermAssociation.builder() + .urn(GLOSSARY_TERM_URN).build())) + .auditStamp(AuditStamp.builder() .time(0L) - .actor(CORPUSER_URN))) + .actor(CORPUSER_URN).build()).build()) .entityType(DATASET_ENTITY_NAME) - .entityKeyAspect(new DatasetKey() + .entityKeyAspect(DatasetKey.builder() .name("name") .platform(DATA_PLATFORM_URN) - .origin(FabricType.PROD)) + .origin(FabricType.PROD).build()) .build(); datasetAspects.add(glossaryTerms); diff --git a/metadata-service/schema-registry-api/build.gradle b/metadata-service/schema-registry-api/build.gradle index 7bf1e558c8906..290126836eb4a 100644 --- a/metadata-service/schema-registry-api/build.gradle +++ b/metadata-service/schema-registry-api/build.gradle @@ -20,7 +20,7 @@ dependencies { // End of dependencies implementation externalDependency.swaggerAnnotations - swaggerCodegen 'io.swagger.codegen.v3:swagger-codegen-cli:3.0.46' + swaggerCodegen externalDependency.swaggerCli testImplementation externalDependency.assertJ } diff --git a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/config/SpringWebSchemaRegistryConfig.java b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/config/SpringWebSchemaRegistryConfig.java index c67d6d8f6fe17..d217d501630e3 100644 --- a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/config/SpringWebSchemaRegistryConfig.java +++ b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/config/SpringWebSchemaRegistryConfig.java @@ -1,6 +1,7 @@ package io.datahubproject.openapi.schema.registry.config; import io.swagger.v3.oas.annotations.OpenAPIDefinition; +import io.swagger.v3.oas.annotations.info.Info; import io.swagger.v3.oas.annotations.servers.Server; import java.util.List; import org.springframework.context.annotation.Configuration; @@ -14,7 +15,8 @@ @EnableWebMvc -@OpenAPIDefinition(servers = {@Server(url = "/schema-registry/", description = "Schema Registry Server URL")}) +@OpenAPIDefinition(info = @Info(title = "DataHub OpenAPI", version = "1.0.0"), + servers = {@Server(url = "/schema-registry/", description = "Schema Registry Server URL")}) @Configuration public class SpringWebSchemaRegistryConfig implements WebMvcConfigurer { diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java index 86043f4b7cd27..30cfc2e0288bd 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -35,6 +35,14 @@ public interface EntityService { + /** + * Just whether the entity/aspect exists + * @param urn urn for the entity + * @param aspectName aspect for the entity + * @return exists or not + */ + Boolean exists(Urn urn, String aspectName); + /** * Retrieves the latest aspects corresponding to a batch of {@link Urn}s based on a provided * set of aspect names. @@ -206,7 +214,7 @@ void ingestEntities(@Nonnull final List entities, @Nonnull final AuditSt @Nonnull final List systemMetadata); @Deprecated - void ingestEntity(Entity entity, AuditStamp auditStamp); + SystemMetadata ingestEntity(Entity entity, AuditStamp auditStamp); @Deprecated void ingestEntity(@Nonnull Entity entity, @Nonnull AuditStamp auditStamp, diff --git a/metadata-service/war/build.gradle b/metadata-service/war/build.gradle index 3bd2695c927a7..122c2b9d5357b 100644 --- a/metadata-service/war/build.gradle +++ b/metadata-service/war/build.gradle @@ -19,6 +19,8 @@ dependencies { runtimeOnly project(':metadata-service:graphql-servlet-impl') runtimeOnly project(':metadata-service:health-servlet') runtimeOnly project(':metadata-service:openapi-servlet') + runtimeOnly project(':metadata-service:openapi-entity-servlet') + runtimeOnly project(':metadata-service:openapi-analytics-servlet') runtimeOnly project(':metadata-service:schema-registry-servlet') runtimeOnly project(':metadata-jobs:mce-consumer') runtimeOnly project(':metadata-jobs:mae-consumer') diff --git a/metadata-service/war/src/main/resources/boot/policies.json b/metadata-service/war/src/main/resources/boot/policies.json index 3cda0269b79f1..410596cc30cbe 100644 --- a/metadata-service/war/src/main/resources/boot/policies.json +++ b/metadata-service/war/src/main/resources/boot/policies.json @@ -31,7 +31,8 @@ "GET_ES_TASK_STATUS_PRIVILEGE", "SET_WRITEABLE_PRIVILEGE", "APPLY_RETENTION_PRIVILEGE", - "MANAGE_GLOBAL_OWNERSHIP_TYPES" + "MANAGE_GLOBAL_OWNERSHIP_TYPES", + "GET_ANALYTICS_PRIVILEGE" ], "displayName":"Root User - All Platform Privileges", "description":"Grants full platform privileges to root datahub super user.", @@ -204,7 +205,8 @@ "GET_ES_TASK_STATUS_PRIVILEGE", "SET_WRITEABLE_PRIVILEGE", "APPLY_RETENTION_PRIVILEGE", - "MANAGE_GLOBAL_OWNERSHIP_TYPES" + "MANAGE_GLOBAL_OWNERSHIP_TYPES", + "GET_ANALYTICS_PRIVILEGE" ], "displayName":"Admins - Platform Policy", "description":"Admins have all platform privileges.", diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java index 0b0d462f079bf..df960808d8a41 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java @@ -47,6 +47,11 @@ public class PoliciesConfig { "View Analytics", "View the DataHub analytics dashboard."); + public static final Privilege GET_ANALYTICS_PRIVILEGE = Privilege.of( + "GET_ANALYTICS_PRIVILEGE", + "Analytics API access", + "API read access to raw analytics data."); + public static final Privilege GENERATE_PERSONAL_ACCESS_TOKENS_PRIVILEGE = Privilege.of( "GENERATE_PERSONAL_ACCESS_TOKENS", "Generate Personal Access Tokens", @@ -117,6 +122,7 @@ public class PoliciesConfig { MANAGE_POLICIES_PRIVILEGE, MANAGE_USERS_AND_GROUPS_PRIVILEGE, VIEW_ANALYTICS_PRIVILEGE, + GET_ANALYTICS_PRIVILEGE, MANAGE_DOMAINS_PRIVILEGE, MANAGE_GLOBAL_ANNOUNCEMENTS_PRIVILEGE, MANAGE_INGESTION_PRIVILEGE, diff --git a/settings.gradle b/settings.gradle index 270672e929e88..d6777b07b3fb3 100644 --- a/settings.gradle +++ b/settings.gradle @@ -14,6 +14,9 @@ include 'metadata-service:restli-client' include 'metadata-service:restli-servlet-impl' include 'metadata-service:graphql-servlet-impl' include 'metadata-service:openapi-servlet' +include 'metadata-service:openapi-entity-servlet' +include 'metadata-service:openapi-entity-servlet:generators' +include 'metadata-service:openapi-analytics-servlet' include 'metadata-service:plugin' include 'metadata-service:plugin:src:test:sample-test-plugins' include 'metadata-dao-impl:kafka-producer' From 60f1fa9c34e12667a551131ed6a432d7ca4db6b6 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 7 Sep 2023 17:24:57 -0500 Subject: [PATCH 15/20] docs(db-retention): update with default setting (#8797) --- docs/advanced/db-retention.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/advanced/db-retention.md b/docs/advanced/db-retention.md index 91c7e2f0d3adc..f154609a5ab2f 100644 --- a/docs/advanced/db-retention.md +++ b/docs/advanced/db-retention.md @@ -38,8 +38,8 @@ We are planning to support a cron-based application of retention in the near fut ## How to configure? -For the initial iteration, we have made this feature opt-in. Please set **ENTITY_SERVICE_ENABLE_RETENTION=true** when -creating the datahub-gms container/k8s pod. +We have enabled with feature by default. Please set **ENTITY_SERVICE_ENABLE_RETENTION=false** when +creating the datahub-gms container/k8s pod to prevent the retention policies from taking effect. On GMS start up, retention policies are initialized with: 1. First, the default provided **version-based** retention to keep **20 latest aspects** for all entity-aspect pairs. From 519b36d7b5aec0c63ae76907fdef05274487bc86 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 7 Sep 2023 18:06:21 -0500 Subject: [PATCH 16/20] fix(custom-search): fix custom search to be able to use unquoted query (#8805) --- .../search/elasticsearch/query/request/SearchQueryBuilder.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java index 4eebf02d70e9e..b01c736ec23ae 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java @@ -437,7 +437,8 @@ public Optional boolQueryBuilder(QueryConfiguration customQuer private BoolQueryBuilder toBoolQueryBuilder(String query, BoolQueryConfiguration boolQuery) { try { String jsonFragment = OBJECT_MAPPER.writeValueAsString(boolQuery) - .replace("\"{{query_string}}\"", OBJECT_MAPPER.writeValueAsString(query)); + .replace("\"{{query_string}}\"", OBJECT_MAPPER.writeValueAsString(query)) + .replace("\"{{unquoted_query_string}}\"", OBJECT_MAPPER.writeValueAsString(unquote(query))); XContentParser parser = XContentType.JSON.xContent().createParser(X_CONTENT_REGISTRY, LoggingDeprecationHandler.INSTANCE, jsonFragment); return BoolQueryBuilder.fromXContent(parser); From 90bceb77a4177ed22d2f56a269029313f4fa8740 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Fri, 8 Sep 2023 09:16:51 +0900 Subject: [PATCH 17/20] feat: add feedback widget (#8732) Co-authored-by: Jeff Merrick --- docs-website/package.json | 4 +- docs-website/src/components/Feedback/index.js | 102 +++++++++++ .../components/Feedback/styles.module.scss | 78 +++++++++ .../src/components/Feedback/supabase.js | 9 + .../src/theme/DocItem/Footer/index.js | 58 +++++++ .../theme/DocItem/Footer/styles.module.css | 11 ++ docs-website/yarn.lock | 163 +++++++++++++++++- 7 files changed, 423 insertions(+), 2 deletions(-) create mode 100644 docs-website/src/components/Feedback/index.js create mode 100644 docs-website/src/components/Feedback/styles.module.scss create mode 100644 docs-website/src/components/Feedback/supabase.js create mode 100644 docs-website/src/theme/DocItem/Footer/index.js create mode 100644 docs-website/src/theme/DocItem/Footer/styles.module.css diff --git a/docs-website/package.json b/docs-website/package.json index 6a0f038d19f33..400ef4143c786 100644 --- a/docs-website/package.json +++ b/docs-website/package.json @@ -31,6 +31,7 @@ "@octokit/plugin-throttling": "^3.5.1", "@octokit/rest": "^18.6.2", "@radix-ui/react-visually-hidden": "^1.0.2", + "@supabase/supabase-js": "^2.33.1", "antd": "^5.0.7", "clsx": "^1.1.1", "docusaurus-graphql-plugin": "0.5.0", @@ -39,7 +40,8 @@ "markprompt": "^0.1.7", "react": "^18.2.0", "react-dom": "18.2.0", - "sass": "^1.43.2" + "sass": "^1.43.2", + "uuid": "^9.0.0" }, "browserslist": { "production": [ diff --git a/docs-website/src/components/Feedback/index.js b/docs-website/src/components/Feedback/index.js new file mode 100644 index 0000000000000..ecabca445bd48 --- /dev/null +++ b/docs-website/src/components/Feedback/index.js @@ -0,0 +1,102 @@ +import React, { useState, useMemo } from "react"; +import clsx from "clsx"; +import { supabase } from "./supabase"; +import styles from "./styles.module.scss"; +import { LikeOutlined, DislikeOutlined, CheckCircleOutlined } from "@ant-design/icons"; +import { v4 as uuidv4 } from "uuid"; + +const Feedback = () => { + const [reaction, setReaction] = useState(null); + const [feedback, setFeedback] = useState(""); + const [submitted, setSubmitted] = useState(false); + const [reactionId, setReactionId] = useState(null); + + const handleReaction = async (selectedReaction) => { + if (reaction !== selectedReaction) { + const uuid = uuidv4(); + try { + const { error } = await supabase.from("reaction_feedback").insert([ + { + id: uuid, + page: window.location.href, + reaction: selectedReaction, + }, + ]); + + if (error) { + console.error("Error submitting feedback:", error); + return; + } + setReactionId(uuid); + setReaction(selectedReaction); + } catch (error) { + console.error("Error submitting feedback:", error); + } + } else { + setReaction(null); + } + }; + + const handleSubmit = async (e) => { + e.preventDefault(); + try { + if (feedback !== "" && reactionId !== null) { + const { error } = await supabase.from("written_feedback").insert([ + { + feedback: feedback, + reaction_feedback_id: reactionId, + }, + ]); + + if (error) { + console.error("Error submitting feedback:", error); + return; + } + setSubmitted(true); + } + } catch (error) { + console.error("Error submitting feedback:", error); + } + }; + + return ( +
+
+ {!submitted ? ( + <> +
+ Is this page helpful? +
+ + +
+
+ {reaction !== null && ( +
+ + +
+ )} + + ) : ( +
+ + Thanks for your feedback! +
+ )} +
+
+ ); +}; + +export default Feedback; diff --git a/docs-website/src/components/Feedback/styles.module.scss b/docs-website/src/components/Feedback/styles.module.scss new file mode 100644 index 0000000000000..b0fa3d7d1bd2b --- /dev/null +++ b/docs-website/src/components/Feedback/styles.module.scss @@ -0,0 +1,78 @@ +.feedbackWrapper { + display: flex; +} + +.feedbackWidget { + min-height: 64px; + margin: 15px auto; + padding: 10px 20px; + border: var(--ifm-hr-border-color) 1px solid; + border-radius: 32px; + text-align: center; + display: flex; + justify-content: center; + flex-direction: column; +} + +.feedbackButtons { + strong { + margin-right: 4px; + } + + display: flex; + justify-content: center; + align-items: center; + gap: 1rem; +} + +.feedbackForm { + margin: 1rem 0; + display: flex; + flex-direction: column; + width: 100%; + gap: 0.8rem; + button { + margin-left: auto; + } +} + +.feedbackText { + width: 100%; + border: var(--ifm-hr-border-color) 1px solid; + border-radius: 0.4rem; + padding: 0.4rem; + font-family: "Manrope", sans-serif; +} + +.feedbackButton { + width: 2rem; + height: 2rem; + text-align: center; + font-size: 1.25rem; + padding: 0.25rem; + border-radius: 1000em; + margin-left: 1rem; + cursor: pointer; + transition: all 0.2s ease-in-out; + svg { + fill: var(--ifm-link-color); + } + + &:hover, + &.active { + background: var(--ifm-link-color); + svg { + fill: var(--ifm-background-color); + } + } +} + +.feedbackMessage { + display: flex; + align-items: center; + svg { + font-size: larger; + margin-right: 6px; + fill: var(--ifm-color-success); + } +} diff --git a/docs-website/src/components/Feedback/supabase.js b/docs-website/src/components/Feedback/supabase.js new file mode 100644 index 0000000000000..2f3b8e4306565 --- /dev/null +++ b/docs-website/src/components/Feedback/supabase.js @@ -0,0 +1,9 @@ +import { createClient } from "@supabase/supabase-js"; + +const supabaseUrl = "https://ttydafdojardufehywni.supabase.co"; +const supabaseKey = + "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InR0eWRhZmRvamFyZHVmZWh5d25pIiwicm9sZSI6ImFub24iLCJpYXQiOjE2OTMzNDk2NDksImV4cCI6MjAwODkyNTY0OX0.X2KKTPFzouQyXAQH3VTrL-fyhbdUtlPsLHIYtoACQss"; + +export const supabase = createClient(supabaseUrl, supabaseKey); + +export default supabase; diff --git a/docs-website/src/theme/DocItem/Footer/index.js b/docs-website/src/theme/DocItem/Footer/index.js new file mode 100644 index 0000000000000..94b6c7734f9ee --- /dev/null +++ b/docs-website/src/theme/DocItem/Footer/index.js @@ -0,0 +1,58 @@ +import React from "react"; +import clsx from "clsx"; +import { ThemeClassNames } from "@docusaurus/theme-common"; +import { useDoc } from "@docusaurus/theme-common/internal"; +import LastUpdated from "@theme/LastUpdated"; +import EditThisPage from "@theme/EditThisPage"; +import TagsListInline from "@theme/TagsListInline"; +import styles from "./styles.module.css"; +import Feedback from "../../../components/Feedback"; + +function TagsRow(props) { + return ( +
+
+ +
+
+ ); +} +function EditMetaRow({ editUrl, lastUpdatedAt, lastUpdatedBy, formattedLastUpdatedAt }) { + return ( +
+
{editUrl && }
+ +
+ {(lastUpdatedAt || lastUpdatedBy) && ( + + )} +
+
+ ); +} +export default function DocItemFooter() { + const { metadata } = useDoc(); + const { editUrl, lastUpdatedAt, formattedLastUpdatedAt, lastUpdatedBy, tags, unversionedId } = metadata; + const canDisplayTagsRow = tags.length > 0; + const canDisplayEditMetaRow = !!(editUrl || lastUpdatedAt || lastUpdatedBy); + const canDisplayFooter = canDisplayTagsRow || canDisplayEditMetaRow; + if (!canDisplayFooter) { + return null; + } + return ( + <> +
+ {canDisplayTagsRow && } + {canDisplayEditMetaRow && ( + + )} +
+ + + ); +} diff --git a/docs-website/src/theme/DocItem/Footer/styles.module.css b/docs-website/src/theme/DocItem/Footer/styles.module.css new file mode 100644 index 0000000000000..7c1e964419179 --- /dev/null +++ b/docs-website/src/theme/DocItem/Footer/styles.module.css @@ -0,0 +1,11 @@ +.lastUpdated { + margin-top: 0.2rem; + font-style: italic; + font-size: smaller; +} + +@media (min-width: 997px) { + .lastUpdated { + text-align: right; + } +} diff --git a/docs-website/yarn.lock b/docs-website/yarn.lock index a93ae9e2bc9c3..209a57a43dab0 100644 --- a/docs-website/yarn.lock +++ b/docs-website/yarn.lock @@ -2499,6 +2499,62 @@ p-map "^4.0.0" webpack-sources "^3.2.2" +"@supabase/functions-js@^2.1.0": + version "2.1.4" + resolved "https://registry.yarnpkg.com/@supabase/functions-js/-/functions-js-2.1.4.tgz#57da24829ffe8f15c002dfcc615ef4ab5735156d" + integrity sha512-5EEhei1hFCMBX4Pig4kGKjJ59DZvXwilcIBYYp4wyK/iHdAN6Vw9di9VN6/oRXRVS/6jgZd0jdmI+QgGGSxZsA== + dependencies: + cross-fetch "^3.1.5" + +"@supabase/gotrue-js@^2.46.1": + version "2.48.1" + resolved "https://registry.yarnpkg.com/@supabase/gotrue-js/-/gotrue-js-2.48.1.tgz#f7f0e8c27e22215a59e99f8ac07254742bdafcc6" + integrity sha512-jTcqA/iz2crq2b3PbzXZrW3QIoKJ7SfwPWTGJNv//1nDi0i2vCx1mO+BjXwPx+TlOSG8okK0QB2eP8uaF83OvA== + dependencies: + cross-fetch "^3.1.5" + +"@supabase/node-fetch@^2.6.14": + version "2.6.14" + resolved "https://registry.yarnpkg.com/@supabase/node-fetch/-/node-fetch-2.6.14.tgz#6a3e2924e3de8aeeb82c193c786ffb25da9af23f" + integrity sha512-w/Tsd22e/5fAeoxqQ4P2MX6EyF+iM6rc9kmlMVFkHuG0rAltt2TLhFbDJfemnHbtvnazWaRfy5KnFU/SYT37dQ== + dependencies: + whatwg-url "^5.0.0" + +"@supabase/postgrest-js@^1.8.0": + version "1.8.4" + resolved "https://registry.yarnpkg.com/@supabase/postgrest-js/-/postgrest-js-1.8.4.tgz#89e8355503979ad25e7340b910d17704507ab325" + integrity sha512-ELjpvhb04wILUiJz9zIsTSwaz9LQNlX+Ig5/LgXQ7k68qQI6NqHVn+ISRNt53DngUIyOnLHjeqqIRHBZ7zpgGA== + dependencies: + "@supabase/node-fetch" "^2.6.14" + +"@supabase/realtime-js@^2.7.4": + version "2.7.4" + resolved "https://registry.yarnpkg.com/@supabase/realtime-js/-/realtime-js-2.7.4.tgz#de41195bd3f2cdd6db82d9f93c4c5b8fae9f809b" + integrity sha512-FzSzs1k9ruh/uds5AJ95Nc3beiMCCIhougExJ3O98CX1LMLAKUKFy5FivKLvcNhXnNfUEL0XUfGMb4UH2J7alg== + dependencies: + "@types/phoenix" "^1.5.4" + "@types/websocket" "^1.0.3" + websocket "^1.0.34" + +"@supabase/storage-js@^2.5.1": + version "2.5.3" + resolved "https://registry.yarnpkg.com/@supabase/storage-js/-/storage-js-2.5.3.tgz#6d6023d0420151a4c65339e762eca7838fd0f97c" + integrity sha512-wyCkBFMTiehvyLUvvvSszvhPkhaHKHcPx//fYN8NoKEa1TQwC2HuO5EIaJ5EagtAVmI1N3EFQ+M4RER6mnTaNg== + dependencies: + cross-fetch "^3.1.5" + +"@supabase/supabase-js@^2.33.1": + version "2.33.1" + resolved "https://registry.yarnpkg.com/@supabase/supabase-js/-/supabase-js-2.33.1.tgz#2407861afe63c2817d030514c87a745f78dfe68a" + integrity sha512-jA00rquPTppPOHpBB6KABW98lfg0gYXcuGqP3TB1iiduznRVsi3GGk2qBKXPDLMYSe0kRlQp5xCwWWthaJr8eA== + dependencies: + "@supabase/functions-js" "^2.1.0" + "@supabase/gotrue-js" "^2.46.1" + "@supabase/postgrest-js" "^1.8.0" + "@supabase/realtime-js" "^2.7.4" + "@supabase/storage-js" "^2.5.1" + cross-fetch "^3.1.5" + "@svgr/babel-plugin-add-jsx-attribute@^6.5.1": version "6.5.1" resolved "https://registry.yarnpkg.com/@svgr/babel-plugin-add-jsx-attribute/-/babel-plugin-add-jsx-attribute-6.5.1.tgz#74a5d648bd0347bda99d82409d87b8ca80b9a1ba" @@ -2815,6 +2871,11 @@ resolved "https://registry.yarnpkg.com/@types/parse5/-/parse5-5.0.3.tgz#e7b5aebbac150f8b5fdd4a46e7f0bd8e65e19109" integrity sha512-kUNnecmtkunAoQ3CnjmMkzNU/gtxG8guhi+Fk2U/kOpIKjIMKnXGp4IJCgQJrXSgMsWYimYG4TGjz/UzbGEBTw== +"@types/phoenix@^1.5.4": + version "1.6.0" + resolved "https://registry.yarnpkg.com/@types/phoenix/-/phoenix-1.6.0.tgz#eb7536259ee695646e75c4c7b0c9a857ea174781" + integrity sha512-qwfpsHmFuhAS/dVd4uBIraMxRd56vwBUYQGZ6GpXnFuM2XMRFJbIyruFKKlW2daQliuYZwe0qfn/UjFCDKic5g== + "@types/prop-types@*", "@types/prop-types@^15.0.0": version "15.7.5" resolved "https://registry.yarnpkg.com/@types/prop-types/-/prop-types-15.7.5.tgz#5f19d2b85a98e9558036f6a3cacc8819420f05cf" @@ -3594,6 +3655,13 @@ buffer@^5.5.0, buffer@^5.7.0: base64-js "^1.3.1" ieee754 "^1.1.13" +bufferutil@^4.0.1: + version "4.0.7" + resolved "https://registry.yarnpkg.com/bufferutil/-/bufferutil-4.0.7.tgz#60c0d19ba2c992dd8273d3f73772ffc894c153ad" + integrity sha512-kukuqc39WOHtdxtw4UScxF/WVnMFVSQVKhtx3AjZJzhd0RGZZldcrfSEbVsWWe6KNH253574cq5F+wpv0G9pJw== + dependencies: + node-gyp-build "^4.3.0" + bytes@3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/bytes/-/bytes-3.0.0.tgz#d32815404d689699f85a4ea4fa8755dd13a96048" @@ -4278,6 +4346,14 @@ csstype@^3.0.10, csstype@^3.0.2: resolved "https://registry.yarnpkg.com/csstype/-/csstype-3.1.2.tgz#1d4bf9d572f11c14031f0436e1c10bc1f571f50b" integrity sha512-I7K1Uu0MBPzaFKg4nI5Q7Vs2t+3gWWW648spaF+Rg7pI9ds18Ugn+lvg4SHczUdKlHI5LWBXyqfS8+DufyBsgQ== +d@1, d@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/d/-/d-1.0.1.tgz#8698095372d58dbee346ffd0c7093f99f8f9eb5a" + integrity sha512-m62ShEObQ39CfralilEQRjH6oAMtNCV1xJyEx5LpRYUVN+EviphDgUc/F3hnYbADmkiNs67Y+3ylmlG7Lnu+FA== + dependencies: + es5-ext "^0.10.50" + type "^1.0.1" + dataloader@2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/dataloader/-/dataloader-2.0.0.tgz#41eaf123db115987e21ca93c005cd7753c55fe6f" @@ -4288,7 +4364,7 @@ dayjs@^1.11.1: resolved "https://registry.yarnpkg.com/dayjs/-/dayjs-1.11.9.tgz#9ca491933fadd0a60a2c19f6c237c03517d71d1a" integrity sha512-QvzAURSbQ0pKdIye2txOzNaHmxtUBXerpY0FJsFXUMKbIZeFm5ht1LS/jFsrncjnmtv8HsG0W2g6c0zUjZWmpA== -debug@2.6.9, debug@^2.6.0: +debug@2.6.9, debug@^2.2.0, debug@^2.6.0: version "2.6.9" resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f" integrity sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA== @@ -4666,6 +4742,32 @@ es-module-lexer@^1.2.1: resolved "https://registry.yarnpkg.com/es-module-lexer/-/es-module-lexer-1.3.0.tgz#6be9c9e0b4543a60cd166ff6f8b4e9dae0b0c16f" integrity sha512-vZK7T0N2CBmBOixhmjdqx2gWVbFZ4DXZ/NyRMZVlJXPa7CyFS+/a4QQsDGDQy9ZfEzxFuNEsMLeQJnKP2p5/JA== +es5-ext@^0.10.35, es5-ext@^0.10.50: + version "0.10.62" + resolved "https://registry.yarnpkg.com/es5-ext/-/es5-ext-0.10.62.tgz#5e6adc19a6da524bf3d1e02bbc8960e5eb49a9a5" + integrity sha512-BHLqn0klhEpnOKSrzn/Xsz2UIW8j+cGmo9JLzr8BiUapV8hPL9+FliFqjwr9ngW7jWdnxv6eO+/LqyhJVqgrjA== + dependencies: + es6-iterator "^2.0.3" + es6-symbol "^3.1.3" + next-tick "^1.1.0" + +es6-iterator@^2.0.3: + version "2.0.3" + resolved "https://registry.yarnpkg.com/es6-iterator/-/es6-iterator-2.0.3.tgz#a7de889141a05a94b0854403b2d0a0fbfa98f3b7" + integrity sha512-zw4SRzoUkd+cl+ZoE15A9o1oQd920Bb0iOJMQkQhl3jNc03YqVjAhG7scf9C5KWRU/R13Orf588uCC6525o02g== + dependencies: + d "1" + es5-ext "^0.10.35" + es6-symbol "^3.1.1" + +es6-symbol@^3.1.1, es6-symbol@^3.1.3: + version "3.1.3" + resolved "https://registry.yarnpkg.com/es6-symbol/-/es6-symbol-3.1.3.tgz#bad5d3c1bcdac28269f4cb331e431c78ac705d18" + integrity sha512-NJ6Yn3FuDinBaBRWl/q5X/s4koRHBrgKAu+yGI6JCBeiu3qrcbJhwT2GeR/EXVfylRk8dpQVJoLEFhK+Mu31NA== + dependencies: + d "^1.0.1" + ext "^1.1.2" + escalade@^3.1.1: version "3.1.1" resolved "https://registry.yarnpkg.com/escalade/-/escalade-3.1.1.tgz#d8cfdc7000965c5a0174b4a82eaa5c0552742e40" @@ -4826,6 +4928,13 @@ express@^4.17.3: utils-merge "1.0.1" vary "~1.1.2" +ext@^1.1.2: + version "1.7.0" + resolved "https://registry.yarnpkg.com/ext/-/ext-1.7.0.tgz#0ea4383c0103d60e70be99e9a7f11027a33c4f5f" + integrity sha512-6hxeJYaL110a9b5TEJSj0gojyHQAmA2ch5Os+ySCiA1QGdS697XWY1pzsrSjqA9LDEEgdB/KypIlR59RcLuHYw== + dependencies: + type "^2.7.2" + extend-shallow@^2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/extend-shallow/-/extend-shallow-2.0.1.tgz#51af7d614ad9a9f610ea1bafbb989d6b1c56890f" @@ -6895,6 +7004,11 @@ neo-async@^2.6.2: resolved "https://registry.yarnpkg.com/neo-async/-/neo-async-2.6.2.tgz#b4aafb93e3aeb2d8174ca53cf163ab7d7308305f" integrity sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw== +next-tick@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/next-tick/-/next-tick-1.1.0.tgz#1836ee30ad56d67ef281b22bd199f709449b35eb" + integrity sha512-CXdUiJembsNjuToQvxayPZF9Vqht7hewsvy2sOWafLvi2awflj9mOC6bHIg50orX8IJvWKY9wYQ/zB2kogPslQ== + no-case@^3.0.4: version "3.0.4" resolved "https://registry.yarnpkg.com/no-case/-/no-case-3.0.4.tgz#d361fd5c9800f558551a8369fc0dcd4662b6124d" @@ -6939,6 +7053,12 @@ node-forge@^1: resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-1.3.1.tgz#be8da2af243b2417d5f646a770663a92b7e9ded3" integrity sha512-dPEtOeMvF9VMcYV/1Wb8CPoVAXtp6MKMlcbAt4ddqmGqUJ6fQZFXkNZNkNlfevtNkGtaSoXf/vNNNSvgrdXwtA== + +node-gyp-build@^4.3.0: + version "4.6.1" + resolved "https://registry.yarnpkg.com/node-gyp-build/-/node-gyp-build-4.6.1.tgz#24b6d075e5e391b8d5539d98c7fc5c210cac8a3e" + integrity sha512-24vnklJmyRS8ViBNI8KbtK/r/DmXQMRiOMXTNz2nrTnAYUwjmEEbnnpB/+kt+yWRv73bPsSPRFddrcIbAxSiMQ== + node-releases@^2.0.13: version "2.0.13" resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-2.0.13.tgz#d5ed1627c23e3461e819b02e57b75e4899b1c81d" @@ -9470,6 +9590,16 @@ type-is@~1.6.18: media-typer "0.3.0" mime-types "~2.1.24" +type@^1.0.1: + version "1.2.0" + resolved "https://registry.yarnpkg.com/type/-/type-1.2.0.tgz#848dd7698dafa3e54a6c479e759c4bc3f18847a0" + integrity sha512-+5nt5AAniqsCnu2cEQQdpzCAh33kVx8n0VoFidKpB1dVVLAN/F+bgVOqOJqOnEnrhp222clB5p3vUlD+1QAnfg== + +type@^2.7.2: + version "2.7.2" + resolved "https://registry.yarnpkg.com/type/-/type-2.7.2.tgz#2376a15a3a28b1efa0f5350dcf72d24df6ef98d0" + integrity sha512-dzlvlNlt6AXU7EBSfpAscydQ7gXB+pPGsPnfJnZpiNJBDj7IaJzQlBZYGdEi4R9HmPdBv2XmWJ6YUtoTa7lmCw== + typedarray-to-buffer@^3.1.5: version "3.1.5" resolved "https://registry.yarnpkg.com/typedarray-to-buffer/-/typedarray-to-buffer-3.1.5.tgz#a97ee7a9ff42691b9f783ff1bc5112fe3fca9080" @@ -9773,11 +9903,20 @@ use-sidecar@^1.1.2: detect-node-es "^1.1.0" tslib "^2.0.0" + +utf-8-validate@^5.0.2: + version "5.0.10" + resolved "https://registry.yarnpkg.com/utf-8-validate/-/utf-8-validate-5.0.10.tgz#d7d10ea39318171ca982718b6b96a8d2442571a2" + integrity sha512-Z6czzLq4u8fPOyx7TU6X3dvUZVvoJmxSQ+IcrlmagKhilxlhZgxPK6C5Jqbkw1IDUmFTM+cz9QDnnLTwDz/2gQ== + dependencies: + node-gyp-build "^4.3.0" + use-sync-external-store@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/use-sync-external-store/-/use-sync-external-store-1.2.0.tgz#7dbefd6ef3fe4e767a0cf5d7287aacfb5846928a" integrity sha512-eEgnFxGQ1Ife9bzYs6VLi8/4X6CObHMw9Qr9tPY43iKwsPw8xE8+EFsf/2cFZ5S3esXgpWgtSCtLNS41F+sKPA== + util-deprecate@^1.0.1, util-deprecate@^1.0.2, util-deprecate@~1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf" @@ -9803,6 +9942,11 @@ uuid@^8.3.2: resolved "https://registry.yarnpkg.com/uuid/-/uuid-8.3.2.tgz#80d5b5ced271bb9af6c445f21a1a04c606cefbe2" integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg== +uuid@^9.0.0: + version "9.0.0" + resolved "https://registry.yarnpkg.com/uuid/-/uuid-9.0.0.tgz#592f550650024a38ceb0c562f2f6aa435761efb5" + integrity sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg== + uvu@^0.5.0: version "0.5.6" resolved "https://registry.yarnpkg.com/uvu/-/uvu-0.5.6.tgz#2754ca20bcb0bb59b64e9985e84d2e81058502df" @@ -10057,6 +10201,18 @@ websocket-extensions@>=0.1.1: resolved "https://registry.yarnpkg.com/websocket-extensions/-/websocket-extensions-0.1.4.tgz#7f8473bc839dfd87608adb95d7eb075211578a42" integrity sha512-OqedPIGOfsDlo31UNwYbCFMSaO9m9G/0faIHj5/dZFDMFqPTcx6UwqyOy3COEaEOg/9VsGIpdqn62W5KhoKSpg== +websocket@^1.0.34: + version "1.0.34" + resolved "https://registry.yarnpkg.com/websocket/-/websocket-1.0.34.tgz#2bdc2602c08bf2c82253b730655c0ef7dcab3111" + integrity sha512-PRDso2sGwF6kM75QykIesBijKSVceR6jL2G8NGYyq2XrItNC2P5/qL5XeR056GhA+Ly7JMFvJb9I312mJfmqnQ== + dependencies: + bufferutil "^4.0.1" + debug "^2.2.0" + es5-ext "^0.10.50" + typedarray-to-buffer "^3.1.5" + utf-8-validate "^5.0.2" + yaeti "^0.0.6" + whatwg-url@^5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-5.0.0.tgz#966454e8765462e37644d3626f6742ce8b70965d" @@ -10163,6 +10319,11 @@ xtend@^4.0.0, xtend@^4.0.1: resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54" integrity sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ== +yaeti@^0.0.6: + version "0.0.6" + resolved "https://registry.yarnpkg.com/yaeti/-/yaeti-0.0.6.tgz#f26f484d72684cf42bedfb76970aa1608fbf9577" + integrity sha512-MvQa//+KcZCUkBTIC9blM+CU9J2GzuTytsOUwf2lidtvkx/6gnEp1QvJv34t9vdjhFmha/mUiNDbN0D0mJWdug== + yallist@^3.0.2: version "3.1.1" resolved "https://registry.yarnpkg.com/yallist/-/yallist-3.1.1.tgz#dbb7daf9bfd8bac9ab45ebf602b8cbad0d5d08fd" From 794eb0324a38e5dbb08049f1917bd87de671407e Mon Sep 17 00:00:00 2001 From: skrydal Date: Fri, 8 Sep 2023 03:08:41 +0200 Subject: [PATCH 18/20] fix(gms): Fixed Recently Viewed section for users with '@' in the URN. (#8754) Co-authored-by: pi-skrydalewicz-ext Co-authored-by: amanda-her <912amandahernando@gmail.com> --- .../recommendation/candidatesource/RecentlyViewedSource.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java index 937d80aa5be09..6ef207dada497 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java @@ -122,7 +122,7 @@ private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) { BoolQueryBuilder query = QueryBuilders.boolQuery(); // Filter for the entity view events of the user requesting recommendation query.must( - QueryBuilders.termQuery(ESUtils.toKeywordField(DataHubUsageEventConstants.ACTOR_URN, true), userUrn.toString())); + QueryBuilders.termQuery(ESUtils.toKeywordField(DataHubUsageEventConstants.ACTOR_URN, false), userUrn.toString())); query.must( QueryBuilders.termQuery(DataHubUsageEventConstants.TYPE, DataHubUsageEventType.ENTITY_VIEW_EVENT.getType())); source.query(query); From 68ae3bfc26837f9b26534e0bf565c1ad7fd0d252 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 7 Sep 2023 20:17:50 -0500 Subject: [PATCH 19/20] fix(spark-test): upgrade gradle and fix spark smoke test (#8777) --- .github/workflows/docker-unified.yml | 10 +- .github/workflows/spark-smoke-test.yml | 3 + docker/datahub-ingestion/Dockerfile | 6 +- docker/datahub-ingestion/Dockerfile-slim-only | 25 ++ docker/datahub-ingestion/build.gradle | 9 +- .../gradle/wrapper/gradle-wrapper.properties | 3 +- .../test-spark-lineage/gradlew | 272 +++++++++++------- .../test-spark-lineage/gradlew.bat | 38 +-- 8 files changed, 226 insertions(+), 140 deletions(-) create mode 100644 docker/datahub-ingestion/Dockerfile-slim-only diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 532669c44722c..963155ef09b1c 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -45,6 +45,7 @@ jobs: unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }} unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }} publish: ${{ steps.publish.outputs.publish }} + python_release_version: ${{ steps.tag.outputs.python_release_version }} steps: - name: Checkout uses: actions/checkout@v3 @@ -58,6 +59,7 @@ jobs: echo "unique_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT echo "unique_slim_tag=$(get_unique_tag)-slim" >> $GITHUB_OUTPUT echo "unique_full_tag=$(get_unique_tag)-full" >> $GITHUB_OUTPUT + echo "python_release_version=$(get_python_docker_release_v)" >> $GITHUB_OUTPUT - name: Check whether publishing enabled id: publish env: @@ -573,7 +575,7 @@ jobs: with: image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }} - name: Build and push Slim Image - if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }} + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }} uses: ./.github/actions/docker-custom-build-and-push with: target: final @@ -582,6 +584,7 @@ jobs: build-args: | BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }} DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }} + RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }} APP_ENV=slim tags: ${{ needs.setup.outputs.slim_tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} @@ -655,7 +658,7 @@ jobs: with: image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }} - name: Build and push Full Image - if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }} + if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }} uses: ./.github/actions/docker-custom-build-and-push with: target: final @@ -664,6 +667,7 @@ jobs: build-args: | BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }} DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }} + RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }} tags: ${{ needs.setup.outputs.unique_full_tag }} username: ${{ secrets.ACRYL_DOCKER_USERNAME }} password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} @@ -673,7 +677,7 @@ jobs: platforms: linux/amd64,linux/arm64/v8 - name: Compute Tag (Full) id: tag - run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT + run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT datahub_ingestion_full_scan: permissions: contents: read # for actions/checkout to fetch code diff --git a/.github/workflows/spark-smoke-test.yml b/.github/workflows/spark-smoke-test.yml index ac411d812deea..96b85826cac4f 100644 --- a/.github/workflows/spark-smoke-test.yml +++ b/.github/workflows/spark-smoke-test.yml @@ -30,6 +30,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + with: + fetch-depth: 800 + fetch-tags: true - name: Set up JDK 11 uses: actions/setup-java@v3 with: diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile index 0ecc30d02ac3f..2ceff6a800ebb 100644 --- a/docker/datahub-ingestion/Dockerfile +++ b/docker/datahub-ingestion/Dockerfile @@ -7,10 +7,12 @@ FROM $BASE_IMAGE:$DOCKER_VERSION as base USER 0 COPY ./metadata-ingestion /datahub-ingestion +COPY ./metadata-ingestion-modules/airflow-plugin /datahub-ingestion/airflow-plugin ARG RELEASE_VERSION WORKDIR /datahub-ingestion RUN sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \ + sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" airflow-plugin/src/datahub_airflow_plugin/__init__.py && \ cat src/datahub/__init__.py && \ chown -R datahub /datahub-ingestion @@ -21,7 +23,9 @@ FROM base as slim-install RUN pip install --no-cache --user ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]" FROM base as full-install -RUN pip install --no-cache --user ".[all]" +RUN pip install --no-cache --user ".[base]" && \ + pip install --no-cache --user "./airflow-plugin[acryl-datahub-airflow-plugin]" && \ + pip install --no-cache --user ".[all]" FROM base as dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. diff --git a/docker/datahub-ingestion/Dockerfile-slim-only b/docker/datahub-ingestion/Dockerfile-slim-only new file mode 100644 index 0000000000000..678bee7e306f6 --- /dev/null +++ b/docker/datahub-ingestion/Dockerfile-slim-only @@ -0,0 +1,25 @@ +# Defining environment +ARG BASE_IMAGE=acryldata/datahub-ingestion-base +ARG DOCKER_VERSION=latest + +FROM $BASE_IMAGE:$DOCKER_VERSION as base +USER 0 + +COPY ./metadata-ingestion /datahub-ingestion + +ARG RELEASE_VERSION +WORKDIR /datahub-ingestion +RUN sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \ + cat src/datahub/__init__.py && \ + chown -R datahub /datahub-ingestion + +USER datahub +ENV PATH="/datahub-ingestion/.local/bin:$PATH" + +FROM base as slim-install +RUN pip install --no-cache --user ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]" + +FROM slim-install as final + +USER datahub +ENV PATH="/datahub-ingestion/.local/bin:$PATH" diff --git a/docker/datahub-ingestion/build.gradle b/docker/datahub-ingestion/build.gradle index 22531c0c4fd0e..307594018c92e 100644 --- a/docker/datahub-ingestion/build.gradle +++ b/docker/datahub-ingestion/build.gradle @@ -19,18 +19,19 @@ dependencies { docker { name "${docker_registry}/${docker_repo}:v${version}-slim" version "v${version}-slim" - dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile") + dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile-slim-only") files fileTree(rootProject.projectDir) { include "docker/${docker_dir}/*" include "metadata-ingestion/**" + include "metadata-ingestion-modules/**" }.exclude { i -> i.file.isHidden() || i.file == buildDir || - i.file == project(':metadata-ingestion').buildDir + i.file == project(':metadata-ingestion').buildDir || + i.file == project(':metadata-ingestion-modules').buildDir } buildArgs([DOCKER_VERSION: version, - RELEASE_VERSION: version.replace('-SNAPSHOT', '').replace('v', '').replace('-slim', ''), - APP_ENV: 'slim']) + RELEASE_VERSION: version.replace('-SNAPSHOT', '').replace('v', '').replace('-slim', '')]) } tasks.getByName('docker').dependsOn(['build', ':docker:datahub-ingestion-base:docker', diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradle/wrapper/gradle-wrapper.properties b/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradle/wrapper/gradle-wrapper.properties index ec991f9aa12cb..4e86b9270786f 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradle/wrapper/gradle-wrapper.properties +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,6 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-6.9.2-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.6.2-bin.zip +networkTimeout=10000 zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradlew b/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradlew index 83f2acfdc319a..65dcd68d65c82 100755 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradlew +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradlew @@ -1,7 +1,7 @@ -#!/usr/bin/env sh +#!/bin/sh # -# Copyright 2015 the original author or authors. +# Copyright © 2015-2021 the original authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,78 +17,113 @@ # ############################################################################## -## -## Gradle start up script for UN*X -## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# ############################################################################## # Attempt to set APP_HOME + # Resolve links: $0 may be a link -PRG="$0" -# Need this for relative symlinks. -while [ -h "$PRG" ] ; do - ls=`ls -ld "$PRG"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG=`dirname "$PRG"`"/$link" - fi +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac done -SAVED="`pwd`" -cd "`dirname \"$PRG\"`/" >/dev/null -APP_HOME="`pwd -P`" -cd "$SAVED" >/dev/null -APP_NAME="Gradle" -APP_BASE_NAME=`basename "$0"` +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' # Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD="maximum" +MAX_FD=maximum warn () { echo "$*" -} +} >&2 die () { echo echo "$*" echo exit 1 -} +} >&2 # OS specific support (must be 'true' or 'false'). cygwin=false msys=false darwin=false nonstop=false -case "`uname`" in - CYGWIN* ) - cygwin=true - ;; - Darwin* ) - darwin=true - ;; - MINGW* ) - msys=true - ;; - NONSTOP* ) - nonstop=true - ;; +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; esac CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + # Determine the Java command to use to start the JVM. if [ -n "$JAVA_HOME" ] ; then if [ -x "$JAVA_HOME/jre/sh/java" ] ; then # IBM's JDK on AIX uses strange locations for the executables - JAVACMD="$JAVA_HOME/jre/sh/java" + JAVACMD=$JAVA_HOME/jre/sh/java else - JAVACMD="$JAVA_HOME/bin/java" + JAVACMD=$JAVA_HOME/bin/java fi if [ ! -x "$JAVACMD" ] ; then die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME @@ -97,7 +132,7 @@ Please set the JAVA_HOME variable in your environment to match the location of your Java installation." fi else - JAVACMD="java" + JAVACMD=java which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. Please set the JAVA_HOME variable in your environment to match the @@ -105,84 +140,105 @@ location of your Java installation." fi # Increase the maximum file descriptors if we can. -if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then - MAX_FD_LIMIT=`ulimit -H -n` - if [ $? -eq 0 ] ; then - if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then - MAX_FD="$MAX_FD_LIMIT" - fi - ulimit -n $MAX_FD - if [ $? -ne 0 ] ; then - warn "Could not set maximum file descriptor limit: $MAX_FD" - fi - else - warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" - fi +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac fi -# For Darwin, add options to specify how the application appears in the dock -if $darwin; then - GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" -fi +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. # For Cygwin or MSYS, switch paths to Windows format before running java -if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then - APP_HOME=`cygpath --path --mixed "$APP_HOME"` - CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` - JAVACMD=`cygpath --unix "$JAVACMD"` - - # We build the pattern for arguments to be converted via cygpath - ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` - SEP="" - for dir in $ROOTDIRSRAW ; do - ROOTDIRS="$ROOTDIRS$SEP$dir" - SEP="|" - done - OURCYGPATTERN="(^($ROOTDIRS))" - # Add a user-defined pattern to the cygpath arguments - if [ "$GRADLE_CYGPATTERN" != "" ] ; then - OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" - fi +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + # Now convert the arguments - kludge to limit ourselves to /bin/sh - i=0 - for arg in "$@" ; do - CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` - CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option - - if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition - eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` - else - eval `echo args$i`="\"$arg\"" + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) fi - i=$((i+1)) + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg done - case $i in - (0) set -- ;; - (1) set -- "$args0" ;; - (2) set -- "$args0" "$args1" ;; - (3) set -- "$args0" "$args1" "$args2" ;; - (4) set -- "$args0" "$args1" "$args2" "$args3" ;; - (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; - (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; - (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; - (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; - (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; - esac fi -# Escape application args -save () { - for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done - echo " " -} -APP_ARGS=$(save "$@") +# Collect all arguments for the java command; +# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of +# shell script including quotes and variable substitutions, so put them in +# double quotes to make sure that they get re-expanded; and +# * put everything else in single quotes, so that it's not re-expanded. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi -# Collect all arguments for the java command, following the shell quoting and substitution rules -eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# -# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong -if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then - cd "$(dirname "$0")" -fi +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' exec "$JAVACMD" "$@" diff --git a/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradlew.bat b/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradlew.bat index 24467a141f791..6689b85beecde 100644 --- a/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradlew.bat +++ b/metadata-integration/java/spark-lineage/spark-smoke-test/test-spark-lineage/gradlew.bat @@ -14,7 +14,7 @@ @rem limitations under the License. @rem -@if "%DEBUG%" == "" @echo off +@if "%DEBUG%"=="" @echo off @rem ########################################################################## @rem @rem Gradle startup script for Windows @@ -25,10 +25,14 @@ if "%OS%"=="Windows_NT" setlocal set DIRNAME=%~dp0 -if "%DIRNAME%" == "" set DIRNAME=. +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused set APP_BASE_NAME=%~n0 set APP_HOME=%DIRNAME% +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" @@ -37,7 +41,7 @@ if defined JAVA_HOME goto findJavaFromJavaHome set JAVA_EXE=java.exe %JAVA_EXE% -version >NUL 2>&1 -if "%ERRORLEVEL%" == "0" goto init +if %ERRORLEVEL% equ 0 goto execute echo. echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. @@ -51,7 +55,7 @@ goto fail set JAVA_HOME=%JAVA_HOME:"=% set JAVA_EXE=%JAVA_HOME%/bin/java.exe -if exist "%JAVA_EXE%" goto init +if exist "%JAVA_EXE%" goto execute echo. echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% @@ -61,38 +65,26 @@ echo location of your Java installation. goto fail -:init -@rem Get command-line arguments, handling Windows variants - -if not "%OS%" == "Windows_NT" goto win9xME_args - -:win9xME_args -@rem Slurp the command line arguments. -set CMD_LINE_ARGS= -set _SKIP=2 - -:win9xME_args_slurp -if "x%~1" == "x" goto execute - -set CMD_LINE_ARGS=%* - :execute @rem Setup the command line set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + @rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* :end @rem End local scope for the variables with windows NT shell -if "%ERRORLEVEL%"=="0" goto mainEnd +if %ERRORLEVEL% equ 0 goto mainEnd :fail rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of rem the _cmd.exe /c_ return code! -if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 -exit /b 1 +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% :mainEnd if "%OS%"=="Windows_NT" endlocal From 75252a3d9f6a576904be5a0790d644b9ae2df6ac Mon Sep 17 00:00:00 2001 From: Ramendra761 <68153995+Ramendra761@users.noreply.github.com> Date: Fri, 8 Sep 2023 23:10:49 +0530 Subject: [PATCH 20/20] feat(UI): AccessManagement UI to access the role metadata for a dataset (#8541) Co-authored-by: Ramendra Srivastava --- .../graphql/featureflags/FeatureFlags.java | 1 + .../resolvers/config/AppConfigResolver.java | 1 + .../graphql/resolvers/search/SearchUtils.java | 4 +- .../src/main/resources/app.graphql | 4 + .../src/main/resources/entity.graphql | 10 +- datahub-web-react/src/App.tsx | 2 + .../src/app/entity/Access/RoleEntity.tsx | 88 ++++++ .../app/entity/Access/RoleEntityProfile.tsx | 75 +++++ .../src/app/entity/dataset/DatasetEntity.tsx | 12 + .../AccessManagement/AccessManagement.tsx | 115 ++++++++ .../AccessManagerDescription.tsx | 38 +++ .../__tests__/AccessManagement.test.ts | 267 ++++++++++++++++++ .../tabs/Dataset/AccessManagement/utils.tsx | 27 ++ datahub-web-react/src/appConfigContext.tsx | 1 + .../src/graphql/accessrole.graphql | 8 + datahub-web-react/src/graphql/app.graphql | 1 + datahub-web-react/src/graphql/dataset.graphql | 31 ++ datahub-web-react/src/graphql/search.graphql | 39 +++ .../src/main/resources/application.yml | 1 + 19 files changed, 721 insertions(+), 4 deletions(-) create mode 100644 datahub-web-react/src/app/entity/Access/RoleEntity.tsx create mode 100644 datahub-web-react/src/app/entity/Access/RoleEntityProfile.tsx create mode 100644 datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/AccessManagement.tsx create mode 100644 datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/AccessManagerDescription.tsx create mode 100644 datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/__tests__/AccessManagement.test.ts create mode 100644 datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/utils.tsx create mode 100644 datahub-web-react/src/graphql/accessrole.graphql diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java index f813562945378..de3c217db01ec 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java @@ -15,4 +15,5 @@ public class FeatureFlags { private boolean showBrowseV2 = false; private PreProcessHooks preProcessHooks; private boolean showAcrylInfo = false; + private boolean showAccessManagement = false; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java index 90017f7b87997..09df985b19cf5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java @@ -171,6 +171,7 @@ public CompletableFuture get(final DataFetchingEnvironment environmen .setReadOnlyModeEnabled(_featureFlags.isReadOnlyModeEnabled()) .setShowBrowseV2(_featureFlags.isShowBrowseV2()) .setShowAcrylInfo(_featureFlags.isShowAcrylInfo()) + .setShowAccessManagement(_featureFlags.isShowAccessManagement()) .build(); appConfig.setFeatureFlags(featureFlagsConfig); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java index fe5b79ba2ea3d..fb146ef72877d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java @@ -68,6 +68,7 @@ private SearchUtils() { EntityType.GLOSSARY_TERM, EntityType.GLOSSARY_NODE, EntityType.TAG, + EntityType.ROLE, EntityType.CORP_USER, EntityType.CORP_GROUP, EntityType.CONTAINER, @@ -94,6 +95,7 @@ private SearchUtils() { EntityType.TAG, EntityType.CORP_USER, EntityType.CORP_GROUP, + EntityType.ROLE, EntityType.NOTEBOOK, EntityType.DATA_PRODUCT); @@ -386,4 +388,4 @@ public static List getEntityNames(List inputTypes) { (inputTypes == null || inputTypes.isEmpty()) ? SEARCHABLE_ENTITY_TYPES : inputTypes; return entityTypes.stream().map(EntityTypeMapper::getName).collect(Collectors.toList()); } -} +} \ No newline at end of file diff --git a/datahub-graphql-core/src/main/resources/app.graphql b/datahub-graphql-core/src/main/resources/app.graphql index dbee24b4bf6f7..a5057bcf644da 100644 --- a/datahub-graphql-core/src/main/resources/app.graphql +++ b/datahub-graphql-core/src/main/resources/app.graphql @@ -441,6 +441,10 @@ type FeatureFlagsConfig { Whether we should show CTAs in the UI related to moving to Managed DataHub by Acryl. """ showAcrylInfo: Boolean! + """ + Whether we should show AccessManagement tab in the datahub UI. + """ + showAccessManagement: Boolean! } """ diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index b1f9d57300177..044c405942a3c 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -68,6 +68,10 @@ type Query { Fetch a Tag by primary key (urn) """ tag(urn: String!): Tag + """ + Fetch a Role by primary key (urn) + """ + role(urn: String!): Role """ Fetch a Glossary Term by primary key (urn) @@ -1451,12 +1455,12 @@ type Role implements Entity { """ Role properties to include Request Access Url """ - properties: RoleProperties! + properties: RoleProperties """ A standard Entity Type """ - actors: Actor! + actors: Actor } @@ -11164,4 +11168,4 @@ input UpdateOwnershipTypeInput { The description of the Custom Ownership Type """ description: String -} +} \ No newline at end of file diff --git a/datahub-web-react/src/App.tsx b/datahub-web-react/src/App.tsx index 68a4b93d71481..b6bc608dccbbb 100644 --- a/datahub-web-react/src/App.tsx +++ b/datahub-web-react/src/App.tsx @@ -35,6 +35,7 @@ import GlossaryNodeEntity from './app/entity/glossaryNode/GlossaryNodeEntity'; import { DataPlatformEntity } from './app/entity/dataPlatform/DataPlatformEntity'; import { DataProductEntity } from './app/entity/dataProduct/DataProductEntity'; import { DataPlatformInstanceEntity } from './app/entity/dataPlatformInstance/DataPlatformInstanceEntity'; +import { RoleEntity } from './app/entity/Access/RoleEntity'; /* Construct Apollo Client @@ -116,6 +117,7 @@ const App: React.VFC = () => { register.register(new DomainEntity()); register.register(new ContainerEntity()); register.register(new GlossaryNodeEntity()); + register.register(new RoleEntity()); register.register(new DataPlatformEntity()); register.register(new DataProductEntity()); register.register(new DataPlatformInstanceEntity()); diff --git a/datahub-web-react/src/app/entity/Access/RoleEntity.tsx b/datahub-web-react/src/app/entity/Access/RoleEntity.tsx new file mode 100644 index 0000000000000..e63db9d0bbb2a --- /dev/null +++ b/datahub-web-react/src/app/entity/Access/RoleEntity.tsx @@ -0,0 +1,88 @@ +import { TagOutlined, TagFilled } from '@ant-design/icons'; +import * as React from 'react'; +import styled from 'styled-components'; +import { Role, EntityType, SearchResult } from '../../../types.generated'; +import DefaultPreviewCard from '../../preview/DefaultPreviewCard'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { urlEncodeUrn } from '../shared/utils'; +import RoleEntityProfile from './RoleEntityProfile'; + +const PreviewTagIcon = styled(TagOutlined)` + font-size: 20px; +`; +// /** +// * Definition of the DataHub Access Role entity. +// */ +export class RoleEntity implements Entity { + type: EntityType = EntityType.Role; + + icon = (fontSize: number, styleType: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ; + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => false; + + isLineageEnabled = () => false; + + getAutoCompleteFieldName = () => 'name'; + + getPathName: () => string = () => 'role'; + + getCollectionName: () => string = () => 'Roles'; + + getEntityName: () => string = () => 'Role'; + + renderProfile: (urn: string) => JSX.Element = (_) => ; + + renderPreview = (_: PreviewType, data: Role) => ( + } + type="Role" + typeIcon={this.icon(14, IconStyleType.ACCENT)} + /> + ); + + renderSearch = (result: SearchResult) => { + return this.renderPreview(PreviewType.SEARCH, result.entity as Role); + }; + + displayName = (data: Role) => { + return data.properties?.name || data.urn; + }; + + getOverridePropertiesFromEntity = (data: Role) => { + return { + name: data.properties?.name, + }; + }; + + getGenericEntityProperties = (role: Role) => { + return getDataForEntityType({ data: role, entityType: this.type, getOverrideProperties: (data) => data }); + }; + + supportedCapabilities = () => { + return new Set([EntityCapabilityType.OWNERS]); + }; +} diff --git a/datahub-web-react/src/app/entity/Access/RoleEntityProfile.tsx b/datahub-web-react/src/app/entity/Access/RoleEntityProfile.tsx new file mode 100644 index 0000000000000..d8a31700fb918 --- /dev/null +++ b/datahub-web-react/src/app/entity/Access/RoleEntityProfile.tsx @@ -0,0 +1,75 @@ +import React from 'react'; + +import { useParams } from 'react-router'; +import { Divider, Typography } from 'antd'; +import { grey } from '@ant-design/colors'; +import styled from 'styled-components'; + +import { Message } from '../../shared/Message'; +import { decodeUrn } from '../shared/utils'; +import { useGetExternalRoleQuery } from '../../../graphql/accessrole.generated'; + +const PageContainer = styled.div` + padding: 32px 100px; +`; + +const LoadingMessage = styled(Message)` + margin-top: 10%; +`; + +type RolePageParams = { + urn: string; +}; + +const TitleLabel = styled(Typography.Text)` + &&& { + color: ${grey[2]}; + font-size: 12px; + display: block; + line-height: 20px; + font-weight: 700; + } +`; + +const DescriptionLabel = styled(Typography.Text)` + &&& { + text-align: left; + font-weight: bold; + font-size: 14px; + line-height: 28px; + color: rgb(38, 38, 38); + } +`; + +const TitleText = styled(Typography.Text)` + &&& { + color: ${grey[10]}; + font-weight: 700; + font-size: 20px; + line-height: 28px; + display: inline-block; + margin: 0px 7px; + } +`; + +const { Paragraph } = Typography; + +export default function RoleEntityProfile() { + const { urn: encodedUrn } = useParams(); + const urn = decodeUrn(encodedUrn); + const { data, loading } = useGetExternalRoleQuery({ variables: { urn } }); + + return ( + + {loading && } + Role + {data?.role?.properties?.name} + + {/* Role Description */} + About + + {data?.role?.properties?.description} + + + ); +} diff --git a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx index 535a3f569964c..7d40b97a66b3b 100644 --- a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx +++ b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx @@ -2,6 +2,7 @@ import * as React from 'react'; import { DatabaseFilled, DatabaseOutlined } from '@ant-design/icons'; import { Dataset, DatasetProperties, EntityType, OwnershipType, SearchResult } from '../../../types.generated'; import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { useAppConfig } from '../../useAppConfig'; import { Preview } from './preview/Preview'; import { EntityProfile } from '../shared/containers/profile/EntityProfile'; import { GetDatasetQuery, useGetDatasetQuery, useUpdateDatasetMutation } from '../../../graphql/dataset.generated'; @@ -30,6 +31,7 @@ import { EmbedTab } from '../shared/tabs/Embed/EmbedTab'; import EmbeddedProfile from '../shared/embed/EmbeddedProfile'; import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; import { getDataProduct } from '../shared/utils'; +import AccessManagement from '../shared/tabs/Dataset/AccessManagement/AccessManagement'; import { matchedFieldPathsRenderer } from '../../search/matches/matchedFieldPathsRenderer'; const SUBTYPES = { @@ -69,6 +71,8 @@ export class DatasetEntity implements Entity { isSearchEnabled = () => true; + appconfig = useAppConfig; + isBrowseEnabled = () => true; isLineageEnabled = () => true; @@ -176,6 +180,14 @@ export class DatasetEntity implements Entity { }, }, }, + { + name: 'Access Management', + component: AccessManagement, + display: { + visible: (_, _1) => this.appconfig().config.featureFlags.showAccessManagement, + enabled: (_, _2) => true, + }, + }, ]} sidebarSections={[ { diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/AccessManagement.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/AccessManagement.tsx new file mode 100644 index 0000000000000..c812569367419 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/AccessManagement.tsx @@ -0,0 +1,115 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Button, Table } from 'antd'; +import { useBaseEntity } from '../../../EntityContext'; +import { GetDatasetQuery, useGetExternalRolesQuery } from '../../../../../../graphql/dataset.generated'; +import { useGetMeQuery } from '../../../../../../graphql/me.generated'; +import { handleAccessRoles } from './utils'; +import AccessManagerDescription from './AccessManagerDescription'; + +const StyledTable = styled(Table)` + overflow: inherit; + height: inherit; + + &&& .ant-table-cell { + background-color: #fff; + } + &&& .ant-table-thead .ant-table-cell { + font-weight: 600; + font-size: 12px; + color: '#898989'; + } + && + .ant-table-thead + > tr + > th:not(:last-child):not(.ant-table-selection-column):not(.ant-table-row-expand-icon-cell):not([colspan])::before { + border: 1px solid #f0f0f0; + } +` as typeof Table; + +const StyledSection = styled.section` + background-color: #fff; + color: black; + width: 83px; + text-align: center; + border-radius: 3px; + border: none; + font-weight: bold; +`; + +const AccessButton = styled(Button)` + background-color: #1890ff; + color: white; + width: 80px; + height: 30px; + border-radius: 3.5px; + border: none; + font-weight: bold; + &:hover { + background-color: #18baff; + color: white; + width: 80px; + height: 30px; + border-radius: 3.5px; + border: none; + font-weight: bold; + } +`; + +export default function AccessManagement() { + const { data: loggedInUser } = useGetMeQuery({ fetchPolicy: 'cache-first' }); + const baseEntity = useBaseEntity(); + const { data: externalRoles } = useGetExternalRolesQuery({ + variables: { urn: baseEntity?.dataset?.urn as string }, + skip: !baseEntity?.dataset?.urn, + }); + + const columns = [ + { + title: 'Role Name', + dataIndex: 'name', + key: 'name', + }, + { + title: 'Description', + dataIndex: 'description', + key: 'description', + render: (roleDescription) => { + return ; + }, + }, + { + title: 'Access Type', + dataIndex: 'accessType', + key: 'accessType', + }, + { + title: 'Access', + dataIndex: 'hasAccess', + key: 'hasAccess', + render: (hasAccess, record) => { + if (hasAccess) { + return Provisioned; + } + if (record?.url) { + return ( + { + e.preventDefault(); + window.open(record.url); + }} + > + Request + + ); + } + return ; + }, + hidden: true, + }, + ]; + + return ( + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/AccessManagerDescription.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/AccessManagerDescription.tsx new file mode 100644 index 0000000000000..c87a499e34ac0 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/AccessManagerDescription.tsx @@ -0,0 +1,38 @@ +import React, { useState } from 'react'; +import styled from 'styled-components'; +import { Typography } from 'antd'; + +export type Props = { + description: any; +}; + +const DescriptionContainer = styled.div` + position: relative; + display: flex; + flex-direction: column; + width: 500px; + height: 100%; + min-height: 22px; +`; + +export default function AccessManagerDescription({ description }: Props) { + const shouldTruncateDescription = description.length > 150; + const [expanded, setIsExpanded] = useState(!shouldTruncateDescription); + const finalDescription = expanded ? description : description.slice(0, 150); + const toggleExpanded = () => { + setIsExpanded(!expanded); + }; + + return ( + + {finalDescription} + { + toggleExpanded(); + }} + > + {(shouldTruncateDescription && (expanded ? ' Read Less' : '...Read More')) || undefined} + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/__tests__/AccessManagement.test.ts b/datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/__tests__/AccessManagement.test.ts new file mode 100644 index 0000000000000..53c7b483d9428 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/__tests__/AccessManagement.test.ts @@ -0,0 +1,267 @@ +import { handleAccessRoles } from '../utils'; +import { GetExternalRolesQuery } from '../../../../../../../graphql/dataset.generated'; +import { GetMeQuery } from '../../../../../../../graphql/me.generated'; + +describe('handleAccessRoles', () => { + it('should properly map the externalroles and loggedin user', () => { + const externalRolesQuery: GetExternalRolesQuery = { + dataset: { + access: { + roles: [ + { + role: { + id: 'accessRole', + properties: { + name: 'accessRole', + description: + 'This role access is required by the developers to test and deploy the code also adding few more details to check the description length for the given data and hence check the condition of read more and read less ', + type: 'READ', + requestUrl: 'https://www.google.com/', + }, + urn: 'urn:li:role:accessRole', + actors: { + users: null, + }, + }, + }, + ], + }, + __typename: 'Dataset', + }, + }; + + const GetMeQueryUser: GetMeQuery = { + me: { + corpUser: { + urn: 'urn:li:corpuser:datahub', + username: 'datahub', + info: { + active: true, + displayName: 'DataHub', + title: 'DataHub Root User', + firstName: null, + lastName: null, + fullName: null, + email: null, + __typename: 'CorpUserInfo', + }, + editableProperties: { + displayName: null, + title: null, + pictureLink: + 'https://raw.githubusercontent.com/datahub-project/datahub/master/datahub-web-react/src/images/default_avatar.png', + teams: [], + skills: [], + __typename: 'CorpUserEditableProperties', + }, + settings: { + appearance: { + showSimplifiedHomepage: false, + __typename: 'CorpUserAppearanceSettings', + }, + views: null, + __typename: 'CorpUserSettings', + }, + __typename: 'CorpUser', + }, + platformPrivileges: { + viewAnalytics: true, + managePolicies: true, + manageIdentities: true, + generatePersonalAccessTokens: true, + manageIngestion: true, + manageSecrets: true, + manageDomains: true, + manageTests: true, + manageGlossaries: true, + manageUserCredentials: true, + manageTags: true, + createDomains: true, + createTags: true, + manageGlobalViews: true, + manageOwnershipTypes: true, + __typename: 'PlatformPrivileges', + }, + __typename: 'AuthenticatedUser', + }, + }; + const externalRole = handleAccessRoles(externalRolesQuery, GetMeQueryUser); + expect(externalRole).toMatchObject([ + { + name: 'accessRole', + description: + 'This role access is required by the developers to test and deploy the code also adding few more details to check the description length for the given data and hence check the condition of read more and read less ', + accessType: 'READ', + hasAccess: false, + url: 'https://www.google.com/', + }, + ]); + }); + it('should return empty array', () => { + const externalRolesQuery: GetExternalRolesQuery = { + dataset: { + access: null, + __typename: 'Dataset', + }, + }; + + const GetMeQueryUser: GetMeQuery = { + me: { + corpUser: { + urn: 'urn:li:corpuser:datahub', + username: 'datahub', + info: { + active: true, + displayName: 'DataHub', + title: 'DataHub Root User', + firstName: null, + lastName: null, + fullName: null, + email: null, + __typename: 'CorpUserInfo', + }, + editableProperties: { + displayName: null, + title: null, + pictureLink: + 'https://raw.githubusercontent.com/datahub-project/datahub/master/datahub-web-react/src/images/default_avatar.png', + teams: [], + skills: [], + __typename: 'CorpUserEditableProperties', + }, + settings: { + appearance: { + showSimplifiedHomepage: false, + __typename: 'CorpUserAppearanceSettings', + }, + views: null, + __typename: 'CorpUserSettings', + }, + __typename: 'CorpUser', + }, + platformPrivileges: { + viewAnalytics: true, + managePolicies: true, + manageIdentities: true, + generatePersonalAccessTokens: true, + manageIngestion: true, + manageSecrets: true, + manageDomains: true, + manageTests: true, + manageGlossaries: true, + manageUserCredentials: true, + manageTags: true, + createDomains: true, + createTags: true, + manageGlobalViews: true, + manageOwnershipTypes: true, + __typename: 'PlatformPrivileges', + }, + __typename: 'AuthenticatedUser', + }, + }; + const externalRole = handleAccessRoles(externalRolesQuery, GetMeQueryUser); + expect(externalRole).toMatchObject([]); + }); + it('should properly map the externalroles and loggedin user and access true', () => { + const externalRolesQuery: GetExternalRolesQuery = { + dataset: { + access: { + roles: [ + { + role: { + id: 'accessRole', + properties: { + name: 'accessRole', + description: + 'This role access is required by the developers to test and deploy the code also adding few more details to check the description length for the given data and hence check the condition of read more and read less ', + type: 'READ', + requestUrl: 'https://www.google.com/', + }, + urn: 'urn:li:role:accessRole', + actors: { + users: [ + { + user: { + urn: 'urn:li:corpuser:datahub', + }, + }, + ], + }, + }, + }, + ], + }, + __typename: 'Dataset', + }, + }; + + const GetMeQueryUser: GetMeQuery = { + me: { + corpUser: { + urn: 'urn:li:corpuser:datahub', + username: 'datahub', + info: { + active: true, + displayName: 'DataHub', + title: 'DataHub Root User', + firstName: null, + lastName: null, + fullName: null, + email: null, + __typename: 'CorpUserInfo', + }, + editableProperties: { + displayName: null, + title: null, + pictureLink: + 'https://raw.githubusercontent.com/datahub-project/datahub/master/datahub-web-react/src/images/default_avatar.png', + teams: [], + skills: [], + __typename: 'CorpUserEditableProperties', + }, + settings: { + appearance: { + showSimplifiedHomepage: false, + __typename: 'CorpUserAppearanceSettings', + }, + views: null, + __typename: 'CorpUserSettings', + }, + __typename: 'CorpUser', + }, + platformPrivileges: { + viewAnalytics: true, + managePolicies: true, + manageIdentities: true, + generatePersonalAccessTokens: true, + manageIngestion: true, + manageSecrets: true, + manageDomains: true, + manageTests: true, + manageGlossaries: true, + manageUserCredentials: true, + manageTags: true, + createDomains: true, + createTags: true, + manageGlobalViews: true, + manageOwnershipTypes: true, + __typename: 'PlatformPrivileges', + }, + __typename: 'AuthenticatedUser', + }, + }; + const externalRole = handleAccessRoles(externalRolesQuery, GetMeQueryUser); + + expect(externalRole).toMatchObject([ + { + name: 'accessRole', + description: + 'This role access is required by the developers to test and deploy the code also adding few more details to check the description length for the given data and hence check the condition of read more and read less ', + accessType: 'READ', + hasAccess: true, + url: 'https://www.google.com/', + }, + ]); + }); +}); diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/utils.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/utils.tsx new file mode 100644 index 0000000000000..71e81e8d7de93 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/AccessManagement/utils.tsx @@ -0,0 +1,27 @@ +export function handleAccessRoles(externalRoles, loggedInUser) { + const accessRoles = new Array(); + if ( + externalRoles?.dataset?.access && + externalRoles?.dataset?.access.roles && + externalRoles?.dataset?.access.roles.length > 0 + ) { + externalRoles?.dataset?.access?.roles?.forEach((userRoles) => { + const role = { + name: userRoles?.role?.properties?.name || ' ', + description: userRoles?.role?.properties?.description || ' ', + accessType: userRoles?.role?.properties?.type || ' ', + hasAccess: + (userRoles?.role?.actors?.users && + userRoles?.role?.actors?.users.length > 0 && + userRoles?.role?.actors?.users?.some( + (user) => user.user.urn === loggedInUser?.me?.corpUser.urn, + )) || + false, + url: userRoles?.role?.properties?.requestUrl || undefined, + }; + accessRoles.push(role); + }); + } + + return accessRoles; +} diff --git a/datahub-web-react/src/appConfigContext.tsx b/datahub-web-react/src/appConfigContext.tsx index 807a17c4fd6a4..096c2fd6ef0e5 100644 --- a/datahub-web-react/src/appConfigContext.tsx +++ b/datahub-web-react/src/appConfigContext.tsx @@ -48,6 +48,7 @@ export const DEFAULT_APP_CONFIG = { showSearchFiltersV2: true, showBrowseV2: true, showAcrylInfo: false, + showAccessManagement: false, }, }; diff --git a/datahub-web-react/src/graphql/accessrole.graphql b/datahub-web-react/src/graphql/accessrole.graphql new file mode 100644 index 0000000000000..ccc7d3496ad6d --- /dev/null +++ b/datahub-web-react/src/graphql/accessrole.graphql @@ -0,0 +1,8 @@ +query getExternalRole($urn: String!) { + role(urn: $urn) { + properties { + name + description + } + } +} \ No newline at end of file diff --git a/datahub-web-react/src/graphql/app.graphql b/datahub-web-react/src/graphql/app.graphql index bf15e5f757f8f..228fa1c9430d0 100644 --- a/datahub-web-react/src/graphql/app.graphql +++ b/datahub-web-react/src/graphql/app.graphql @@ -63,6 +63,7 @@ query appConfig { showSearchFiltersV2 showBrowseV2 showAcrylInfo + showAccessManagement } } } diff --git a/datahub-web-react/src/graphql/dataset.graphql b/datahub-web-react/src/graphql/dataset.graphql index c79c1a4d9d551..658ce2b47c567 100644 --- a/datahub-web-react/src/graphql/dataset.graphql +++ b/datahub-web-react/src/graphql/dataset.graphql @@ -311,3 +311,34 @@ query getDatasetSchema($urn: String!) { } } } + +query getExternalRoles($urn: String!) { + dataset(urn: $urn) { + access { + ...getRoles + } + __typename + } +} + +fragment getRoles on Access { + roles { + role { + id + properties { + name + description + type + requestUrl + } + urn + actors { + users { + user { + urn + } + } + } + } + } +} diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql index 7cd868d7cd2b2..94ff263c02039 100644 --- a/datahub-web-react/src/graphql/search.graphql +++ b/datahub-web-react/src/graphql/search.graphql @@ -44,6 +44,16 @@ fragment autoCompleteFields on Entity { } } ...datasetStatsFields + access { + ...getAccess + } + } + ... on Role { + id + properties { + name + description + } } ... on CorpUser { username @@ -242,6 +252,25 @@ query getAutoCompleteMultipleResults($input: AutoCompleteMultipleInput!) { } } +fragment getAccess on Access { + roles { + role { + ...getRolesName + } + } +} + +fragment getRolesName on Role { + urn + type + id + properties { + name + description + type + } +} + fragment datasetStatsFields on Dataset { lastProfile: datasetProfiles(limit: 1) { rowCount @@ -288,6 +317,9 @@ fragment nonSiblingsDatasetSearchFields on Dataset { editableProperties { description } + access { + ...getAccess + } platformNativeType properties { name @@ -346,6 +378,13 @@ fragment searchResultFields on Entity { } } } + ... on Role { + id + properties { + name + description + } + } ... on CorpUser { username properties { diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml index f49498bfa2325..6fd7b9e6a295c 100644 --- a/metadata-service/configuration/src/main/resources/application.yml +++ b/metadata-service/configuration/src/main/resources/application.yml @@ -294,6 +294,7 @@ featureFlags: alwaysEmitChangeLog: ${ALWAYS_EMIT_CHANGE_LOG:false} # Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur. searchServiceDiffModeEnabled: ${SEARCH_SERVICE_DIFF_MODE_ENABLED:true} # Enables diff mode for search document writes, reduces amount of writes to ElasticSearch documents for no-ops readOnlyModeEnabled: ${READ_ONLY_MODE_ENABLED:false} # Enables read only mode for an instance. Right now this only affects ability to edit user profile image URL but can be extended + showAccessManagement: ${SHOW_ACCESS_MANAGEMENT:false} #Whether we should show AccessManagement tab in the datahub UI. showSearchFiltersV2: ${SHOW_SEARCH_FILTERS_V2:true} # Enables showing the search filters V2 experience. showBrowseV2: ${SHOW_BROWSE_V2:true} # Enables showing the browse v2 sidebar experience. preProcessHooks: