diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index b1fa2a71f4084..4ee995085f91e 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -19,6 +19,11 @@ jobs: uses: actions/setup-java@v1 with: java-version: 1.8 + - uses: actions/setup-python@v2 + with: + python-version: 3.9.9 + - name: Install Python dependencies + run: ./metadata-ingestion/scripts/install_deps.sh - name: Build Docs run: | ./gradlew --info docs-website:build diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 0bd8c791dab8a..6614d9f0de565 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -5,6 +5,7 @@ import com.linkedin.datahub.graphql.analytics.resolver.AnalyticsChartTypeResolver; import com.linkedin.datahub.graphql.analytics.resolver.GetChartsResolver; import com.linkedin.datahub.graphql.analytics.resolver.GetHighlightsResolver; +import com.linkedin.datahub.graphql.analytics.resolver.GetMetadataAnalyticsResolver; import com.linkedin.datahub.graphql.analytics.resolver.IsAnalyticsEnabledResolver; import com.linkedin.datahub.graphql.analytics.service.AnalyticsService; import com.linkedin.datahub.graphql.generated.AggregationMetadata; @@ -26,8 +27,8 @@ import com.linkedin.datahub.graphql.generated.EntityRelationship; import com.linkedin.datahub.graphql.generated.EntityRelationshipLegacy; import com.linkedin.datahub.graphql.generated.ForeignKeyConstraint; -import com.linkedin.datahub.graphql.generated.ListDomainsResult; import com.linkedin.datahub.graphql.generated.InstitutionalMemoryMetadata; +import com.linkedin.datahub.graphql.generated.ListDomainsResult; import com.linkedin.datahub.graphql.generated.MLFeature; import com.linkedin.datahub.graphql.generated.MLFeatureProperties; import com.linkedin.datahub.graphql.generated.MLFeatureTable; @@ -45,10 +46,10 @@ import com.linkedin.datahub.graphql.resolvers.AuthenticatedResolver; import com.linkedin.datahub.graphql.resolvers.MeResolver; import com.linkedin.datahub.graphql.resolvers.auth.GetAccessTokenResolver; -import com.linkedin.datahub.graphql.resolvers.container.ContainerEntitiesResolver; import com.linkedin.datahub.graphql.resolvers.browse.BrowsePathsResolver; import com.linkedin.datahub.graphql.resolvers.browse.BrowseResolver; import com.linkedin.datahub.graphql.resolvers.config.AppConfigResolver; +import com.linkedin.datahub.graphql.resolvers.container.ContainerEntitiesResolver; import com.linkedin.datahub.graphql.resolvers.deprecation.UpdateDeprecationResolver; import com.linkedin.datahub.graphql.resolvers.domain.CreateDomainResolver; import com.linkedin.datahub.graphql.resolvers.domain.DomainEntitiesResolver; @@ -61,18 +62,18 @@ import com.linkedin.datahub.graphql.resolvers.group.ListGroupsResolver; import com.linkedin.datahub.graphql.resolvers.group.RemoveGroupMembersResolver; import com.linkedin.datahub.graphql.resolvers.group.RemoveGroupResolver; -import com.linkedin.datahub.graphql.resolvers.group.UpdateUserStatusResolver; +import com.linkedin.datahub.graphql.resolvers.user.UpdateUserStatusResolver; import com.linkedin.datahub.graphql.resolvers.ingest.execution.CancelIngestionExecutionRequestResolver; import com.linkedin.datahub.graphql.resolvers.ingest.execution.CreateIngestionExecutionRequestResolver; +import com.linkedin.datahub.graphql.resolvers.ingest.execution.GetIngestionExecutionRequestResolver; +import com.linkedin.datahub.graphql.resolvers.ingest.execution.IngestionSourceExecutionRequestsResolver; import com.linkedin.datahub.graphql.resolvers.ingest.secret.CreateSecretResolver; -import com.linkedin.datahub.graphql.resolvers.ingest.source.DeleteIngestionSourceResolver; import com.linkedin.datahub.graphql.resolvers.ingest.secret.DeleteSecretResolver; -import com.linkedin.datahub.graphql.resolvers.ingest.execution.GetIngestionExecutionRequestResolver; -import com.linkedin.datahub.graphql.resolvers.ingest.source.GetIngestionSourceResolver; import com.linkedin.datahub.graphql.resolvers.ingest.secret.GetSecretValuesResolver; -import com.linkedin.datahub.graphql.resolvers.ingest.execution.IngestionSourceExecutionRequestsResolver; -import com.linkedin.datahub.graphql.resolvers.ingest.source.ListIngestionSourcesResolver; import com.linkedin.datahub.graphql.resolvers.ingest.secret.ListSecretsResolver; +import com.linkedin.datahub.graphql.resolvers.ingest.source.DeleteIngestionSourceResolver; +import com.linkedin.datahub.graphql.resolvers.ingest.source.GetIngestionSourceResolver; +import com.linkedin.datahub.graphql.resolvers.ingest.source.ListIngestionSourcesResolver; import com.linkedin.datahub.graphql.resolvers.ingest.source.UpsertIngestionSourceResolver; import com.linkedin.datahub.graphql.resolvers.load.AspectResolver; import com.linkedin.datahub.graphql.resolvers.load.EntityRelationshipsResultResolver; @@ -116,10 +117,10 @@ import com.linkedin.datahub.graphql.types.SearchableEntityType; import com.linkedin.datahub.graphql.types.aspect.AspectType; import com.linkedin.datahub.graphql.types.chart.ChartType; -import com.linkedin.datahub.graphql.types.container.ContainerType; -import com.linkedin.datahub.graphql.types.corpuser.CorpUserType; import com.linkedin.datahub.graphql.types.common.mappers.OperationMapper; +import com.linkedin.datahub.graphql.types.container.ContainerType; import com.linkedin.datahub.graphql.types.corpgroup.CorpGroupType; +import com.linkedin.datahub.graphql.types.corpuser.CorpUserType; import com.linkedin.datahub.graphql.types.dashboard.DashboardType; import com.linkedin.datahub.graphql.types.dataflow.DataFlowType; import com.linkedin.datahub.graphql.types.datajob.DataJobType; @@ -164,8 +165,15 @@ import org.dataloader.DataLoader; import org.dataloader.DataLoaderOptions; -import static com.linkedin.datahub.graphql.Constants.*; -import static graphql.Scalars.*; +import static com.linkedin.datahub.graphql.Constants.ANALYTICS_SCHEMA_FILE; +import static com.linkedin.datahub.graphql.Constants.APP_SCHEMA_FILE; +import static com.linkedin.datahub.graphql.Constants.AUTH_SCHEMA_FILE; +import static com.linkedin.datahub.graphql.Constants.GMS_SCHEMA_FILE; +import static com.linkedin.datahub.graphql.Constants.INGESTION_SCHEMA_FILE; +import static com.linkedin.datahub.graphql.Constants.RECOMMENDATIONS_SCHEMA_FILE; +import static com.linkedin.datahub.graphql.Constants.SEARCH_SCHEMA_FILE; +import static com.linkedin.datahub.graphql.Constants.URN_FIELD_NAME; +import static graphql.Scalars.GraphQLLong; /** * A {@link GraphQLEngine} configured to provide access to the entities and aspects on the the GMS graph. @@ -469,9 +477,10 @@ private void configureAnalyticsResolvers(final RuntimeWiring.Builder builder) { builder.type("Query", typeWiring -> typeWiring.dataFetcher("isAnalyticsEnabled", new IsAnalyticsEnabledResolver(isAnalyticsEnabled))) .type("AnalyticsChart", typeWiring -> typeWiring.typeResolver(new AnalyticsChartTypeResolver())); if (isAnalyticsEnabled) { - builder.type("Query", - typeWiring -> typeWiring.dataFetcher("getAnalyticsCharts", new GetChartsResolver(analyticsService)) - .dataFetcher("getHighlights", new GetHighlightsResolver(analyticsService))); + builder.type("Query", typeWiring -> typeWiring.dataFetcher("getAnalyticsCharts", + new GetChartsResolver(analyticsService, entityClient)) + .dataFetcher("getHighlights", new GetHighlightsResolver(analyticsService)) + .dataFetcher("getMetadataAnalyticsCharts", new GetMetadataAnalyticsResolver(entityClient))); } } @@ -595,7 +604,8 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("updateDashboard", new AuthenticatedResolver<>(new MutableTypeResolver<>(dashboardType))) .dataFetcher("updateDataJob", new AuthenticatedResolver<>(new MutableTypeResolver<>(dataJobType))) .dataFetcher("updateDataFlow", new AuthenticatedResolver<>(new MutableTypeResolver<>(dataFlowType))) - .dataFetcher("updateCorpUserProperties", new AuthenticatedResolver<>(new MutableTypeResolver<>(corpUserType))) + .dataFetcher("updateCorpUserProperties", new MutableTypeResolver<>(corpUserType)) + .dataFetcher("updateCorpGroupProperties", new MutableTypeResolver<>(corpGroupType)) .dataFetcher("addTag", new AuthenticatedResolver<>(new AddTagResolver(entityService))) .dataFetcher("removeTag", new AuthenticatedResolver<>(new RemoveTagResolver(entityService))) .dataFetcher("addTerm", new AuthenticatedResolver<>(new AddTermResolver(entityService))) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java index db36090f7db79..63592489b4959 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java @@ -1,49 +1,64 @@ package com.linkedin.datahub.graphql.analytics.resolver; +import com.datahub.authentication.Authentication; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import com.linkedin.datahub.graphql.analytics.service.AnalyticsService; +import com.linkedin.datahub.graphql.analytics.service.AnalyticsUtil; import com.linkedin.datahub.graphql.generated.AnalyticsChart; import com.linkedin.datahub.graphql.generated.AnalyticsChartGroup; import com.linkedin.datahub.graphql.generated.BarChart; import com.linkedin.datahub.graphql.generated.DateInterval; import com.linkedin.datahub.graphql.generated.DateRange; +import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.NamedBar; import com.linkedin.datahub.graphql.generated.NamedLine; import com.linkedin.datahub.graphql.generated.Row; import com.linkedin.datahub.graphql.generated.TableChart; import com.linkedin.datahub.graphql.generated.TimeSeriesChart; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Optional; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import org.joda.time.DateTime; /** * Retrieves the Charts to be rendered of the Analytics screen of the DataHub application. */ +@Slf4j +@RequiredArgsConstructor public final class GetChartsResolver implements DataFetcher> { private final AnalyticsService _analyticsService; - - public GetChartsResolver(final AnalyticsService analyticsService) { - _analyticsService = analyticsService; - } + private final EntityClient _entityClient; @Override public final List get(DataFetchingEnvironment environment) throws Exception { - final AnalyticsChartGroup group = new AnalyticsChartGroup(); - group.setTitle("Product Analytics"); - group.setCharts(getProductAnalyticsCharts()); - return ImmutableList.of(group); + Authentication authentication = ResolverUtils.getAuthentication(environment); + return ImmutableList.of(AnalyticsChartGroup.builder() + .setGroupId("DataHubUsageAnalytics") + .setTitle("DataHub Usage Analytics") + .setCharts(getProductAnalyticsCharts(authentication)) + .build(), AnalyticsChartGroup.builder() + .setGroupId("GlobalMetadataAnalytics") + .setTitle("Data Landscape Summary") + .setCharts(getGlobalMetadataAnalyticsCharts(authentication)) + .build()); } /** * TODO: Config Driven Charts Instead of Hardcoded. */ - private List getProductAnalyticsCharts() { + private List getProductAnalyticsCharts(Authentication authentication) throws Exception { final List charts = new ArrayList<>(); final DateTime now = DateTime.now(); final DateTime aWeekAgo = now.minusWeeks(1); @@ -59,7 +74,7 @@ private List getProductAnalyticsCharts() { DateInterval weeklyInterval = DateInterval.WEEK; final List wauTimeseries = - _analyticsService.getTimeseriesChart(AnalyticsService.DATAHUB_USAGE_EVENT_INDEX, twoMonthsDateRange, weeklyInterval, + _analyticsService.getTimeseriesChart(_analyticsService.getUsageIndexName(), twoMonthsDateRange, weeklyInterval, Optional.empty(), ImmutableMap.of(), Optional.of("browserId")); charts.add(TimeSeriesChart.builder() .setTitle(wauTitle) @@ -74,7 +89,7 @@ private List getProductAnalyticsCharts() { String searchEventType = "SearchEvent"; final List searchesTimeseries = - _analyticsService.getTimeseriesChart(AnalyticsService.DATAHUB_USAGE_EVENT_INDEX, lastWeekDateRange, dailyInterval, + _analyticsService.getTimeseriesChart(_analyticsService.getUsageIndexName(), lastWeekDateRange, dailyInterval, Optional.empty(), ImmutableMap.of("type", ImmutableList.of(searchEventType)), Optional.empty()); charts.add(TimeSeriesChart.builder() .setTitle(searchesTitle) @@ -88,24 +103,25 @@ private List getProductAnalyticsCharts() { final List columns = ImmutableList.of("Query", "Count"); final List topSearchQueries = - _analyticsService.getTopNTableChart(AnalyticsService.DATAHUB_USAGE_EVENT_INDEX, Optional.of(lastWeekDateRange), - "query.keyword", ImmutableMap.of("type", ImmutableList.of(searchEventType)), Optional.empty(), 10); + _analyticsService.getTopNTableChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange), + "query.keyword", ImmutableMap.of("type", ImmutableList.of(searchEventType)), Optional.empty(), 10, + AnalyticsUtil::buildCellWithSearchLandingPage); charts.add(TableChart.builder().setTitle(topSearchTitle).setColumns(columns).setRows(topSearchQueries).build()); // Chart 4: Bar Graph Chart final String sectionViewsTitle = "Section Views across Entity Types"; final List sectionViewsPerEntityType = - _analyticsService.getBarChart(AnalyticsService.DATAHUB_USAGE_EVENT_INDEX, Optional.of(lastWeekDateRange), + _analyticsService.getBarChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange), ImmutableList.of("entityType.keyword", "section.keyword"), - ImmutableMap.of("type", ImmutableList.of("EntitySectionViewEvent")), Optional.empty()); + ImmutableMap.of("type", ImmutableList.of("EntitySectionViewEvent")), Optional.empty(), true); charts.add(BarChart.builder().setTitle(sectionViewsTitle).setBars(sectionViewsPerEntityType).build()); // Chart 5: Bar Graph Chart final String actionsByTypeTitle = "Actions by Entity Type"; final List eventsByEventType = - _analyticsService.getBarChart(AnalyticsService.DATAHUB_USAGE_EVENT_INDEX, Optional.of(lastWeekDateRange), + _analyticsService.getBarChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange), ImmutableList.of("entityType.keyword", "actionType.keyword"), - ImmutableMap.of("type", ImmutableList.of("EntityActionEvent")), Optional.empty()); + ImmutableMap.of("type", ImmutableList.of("EntityActionEvent")), Optional.empty(), true); charts.add(BarChart.builder().setTitle(actionsByTypeTitle).setBars(eventsByEventType).build()); // Chart 6: Table Chart @@ -113,10 +129,59 @@ private List getProductAnalyticsCharts() { final List columns5 = ImmutableList.of("Dataset", "#Views"); final List topViewedDatasets = - _analyticsService.getTopNTableChart(AnalyticsService.DATAHUB_USAGE_EVENT_INDEX, Optional.of(lastWeekDateRange), - "dataset_name.keyword", ImmutableMap.of("type", ImmutableList.of("EntityViewEvent")), Optional.empty(), 10); + _analyticsService.getTopNTableChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange), + "entityUrn.keyword", ImmutableMap.of("type", ImmutableList.of("EntityViewEvent"), "entityType.keyword", + ImmutableList.of(EntityType.DATASET.name())), Optional.empty(), 10, + AnalyticsUtil::buildCellWithEntityLandingPage); + AnalyticsUtil.hydrateDisplayNameForTable(_entityClient, topViewedDatasets, Constants.DATASET_ENTITY_NAME, + ImmutableSet.of(Constants.DATASET_KEY_ASPECT_NAME), AnalyticsUtil::getDatasetName, authentication); charts.add(TableChart.builder().setTitle(topViewedTitle).setColumns(columns5).setRows(topViewedDatasets).build()); - + + return charts; + } + + private List getGlobalMetadataAnalyticsCharts(Authentication authentication) throws Exception { + final List charts = new ArrayList<>(); + // Chart 1: Entities per domain + final List entitiesPerDomain = + _analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(), + ImmutableList.of("domains.keyword", "platform.keyword"), Collections.emptyMap(), Optional.empty(), false); + AnalyticsUtil.hydrateDisplayNameForBars(_entityClient, entitiesPerDomain, Constants.DOMAIN_ENTITY_NAME, + ImmutableSet.of(Constants.DOMAIN_PROPERTIES_ASPECT_NAME), AnalyticsUtil::getDomainName, authentication); + AnalyticsUtil.hydrateDisplayNameForSegments(_entityClient, entitiesPerDomain, Constants.DATA_PLATFORM_ENTITY_NAME, + ImmutableSet.of(Constants.DATA_PLATFORM_INFO_ASPECT_NAME), AnalyticsUtil::getPlatformName, authentication); + if (!entitiesPerDomain.isEmpty()) { + charts.add(BarChart.builder().setTitle("Entities per Domain").setBars(entitiesPerDomain).build()); + } + + // Chart 2: Entities per platform + final List entitiesPerPlatform = + _analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(), + ImmutableList.of("platform.keyword"), Collections.emptyMap(), Optional.empty(), false); + AnalyticsUtil.hydrateDisplayNameForBars(_entityClient, entitiesPerPlatform, Constants.DATA_PLATFORM_ENTITY_NAME, + ImmutableSet.of(Constants.DATA_PLATFORM_INFO_ASPECT_NAME), AnalyticsUtil::getPlatformName, authentication); + if (!entitiesPerPlatform.isEmpty()) { + charts.add(BarChart.builder().setTitle("Entities per Platform").setBars(entitiesPerPlatform).build()); + } + + // Chart 3: Entities per term + final List entitiesPerTerm = + _analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(), + ImmutableList.of("glossaryTerms.keyword"), Collections.emptyMap(), Optional.empty(), false); + AnalyticsUtil.hydrateDisplayNameForBars(_entityClient, entitiesPerTerm, Constants.GLOSSARY_TERM_ENTITY_NAME, + ImmutableSet.of(Constants.GLOSSARY_TERM_KEY_ASPECT_NAME), AnalyticsUtil::getTermName, authentication); + if (!entitiesPerTerm.isEmpty()) { + charts.add(BarChart.builder().setTitle("Entities per Term").setBars(entitiesPerTerm).build()); + } + + // Chart 4: Entities per fabric type + final List entitiesPerEnv = + _analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(), + ImmutableList.of("origin.keyword"), Collections.emptyMap(), Optional.empty(), false); + if (entitiesPerEnv.size() > 1) { + charts.add(BarChart.builder().setTitle("Entities per Environment").setBars(entitiesPerEnv).build()); + } + return charts; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetHighlightsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetHighlightsResolver.java index fb0766aa460ea..a0b919d1d1593 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetHighlightsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetHighlightsResolver.java @@ -4,6 +4,7 @@ import com.google.common.collect.ImmutableMap; import com.linkedin.datahub.graphql.analytics.service.AnalyticsService; import com.linkedin.datahub.graphql.generated.DateRange; +import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.Highlight; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -11,20 +12,18 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import lombok.RequiredArgsConstructor; import org.joda.time.DateTime; /** * Retrieves the Highlights to be rendered of the Analytics screen of the DataHub application. */ +@RequiredArgsConstructor public final class GetHighlightsResolver implements DataFetcher> { private final AnalyticsService _analyticsService; - public GetHighlightsResolver(final AnalyticsService analyticsService) { - _analyticsService = analyticsService; - } - @Override public final List get(DataFetchingEnvironment environment) throws Exception { return getHighlights(); @@ -48,11 +47,11 @@ private List getHighlights() { String eventType = "SearchEvent"; int weeklyActiveUsers = - _analyticsService.getHighlights(AnalyticsService.DATAHUB_USAGE_EVENT_INDEX, Optional.of(dateRange), + _analyticsService.getHighlights(_analyticsService.getUsageIndexName(), Optional.of(dateRange), ImmutableMap.of(), ImmutableMap.of(), Optional.of("browserId")); int weeklyActiveUsersLastWeek = - _analyticsService.getHighlights(AnalyticsService.DATAHUB_USAGE_EVENT_INDEX, Optional.of(dateRangeLastWeek), + _analyticsService.getHighlights(_analyticsService.getUsageIndexName(), Optional.of(dateRangeLastWeek), ImmutableMap.of(), ImmutableMap.of(), Optional.of("browserId")); String bodyText = ""; @@ -70,40 +69,50 @@ private List getHighlights() { highlights.add(Highlight.builder().setTitle(title).setValue(weeklyActiveUsers).setBody(bodyText).build()); // Entity metdata statistics - highlights.add(getEntityMetadataStats("Datasets", AnalyticsService.DATASET_INDEX)); - highlights.add(getEntityMetadataStats("Dashboards", AnalyticsService.DASHBOARD_INDEX)); - highlights.add(getEntityMetadataStats("Charts", AnalyticsService.CHART_INDEX)); - highlights.add(getEntityMetadataStats("Pipelines", AnalyticsService.DATA_FLOW_INDEX)); - highlights.add(getEntityMetadataStats("Tasks", AnalyticsService.DATA_JOB_INDEX)); + getEntityMetadataStats("Datasets", EntityType.DATASET).ifPresent(highlights::add); + getEntityMetadataStats("Dashboards", EntityType.DASHBOARD).ifPresent(highlights::add); + getEntityMetadataStats("Charts", EntityType.CHART).ifPresent(highlights::add); + getEntityMetadataStats("Pipelines", EntityType.DATA_FLOW).ifPresent(highlights::add); + getEntityMetadataStats("Tasks", EntityType.DATA_JOB).ifPresent(highlights::add); + getEntityMetadataStats("Domains", EntityType.DOMAIN).ifPresent(highlights::add); return highlights; } - private Highlight getEntityMetadataStats(String title, String index) { + private Optional getEntityMetadataStats(String title, EntityType entityType) { + String index = _analyticsService.getEntityIndexName(entityType); int numEntities = getNumEntitiesFiltered(index, ImmutableMap.of()); - int numEntitiesWithOwners = - getNumEntitiesFiltered(index, ImmutableMap.of("hasOwners", ImmutableList.of("true"))); - int numEntitiesWithTags = - getNumEntitiesFiltered(index, ImmutableMap.of("hasTags", ImmutableList.of("true"))); + // If there are no entities for the type, do not show the highlight + if (numEntities == 0) { + return Optional.empty(); + } + int numEntitiesWithOwners = getNumEntitiesFiltered(index, ImmutableMap.of("hasOwners", ImmutableList.of("true"))); + int numEntitiesWithTags = getNumEntitiesFiltered(index, ImmutableMap.of("hasTags", ImmutableList.of("true"))); int numEntitiesWithDescription = - getNumEntitiesFiltered(index, ImmutableMap.of("hasDescription", ImmutableList.of("true"))); - int numEntitiesWithDomains = - getNumEntitiesFiltered(index, ImmutableMap.of("hasDomain", ImmutableList.of("true"))); + getNumEntitiesFiltered(index, ImmutableMap.of("hasDescription", ImmutableList.of("true"))); String bodyText = ""; if (numEntities > 0) { double percentWithOwners = 100.0 * numEntitiesWithOwners / numEntities; double percentWithTags = 100.0 * numEntitiesWithTags / numEntities; double percentWithDescription = 100.0 * numEntitiesWithDescription / numEntities; - double percentWithDomains = 100.0 * numEntitiesWithDomains / numEntities; - bodyText = String.format( - "%.2f%% have owners, %.2f%% have tags, %.2f%% have description, %.2f%% have domain assigned!", - percentWithOwners, percentWithTags, percentWithDescription, percentWithDomains); + if (entityType == EntityType.DOMAIN) { + // Don't show percent with domain when asking for stats regarding domains + bodyText = String.format("%.2f%% have owners, %.2f%% have tags, %.2f%% have description!", percentWithOwners, + percentWithTags, percentWithDescription); + } else { + int numEntitiesWithDomains = + getNumEntitiesFiltered(index, ImmutableMap.of("hasDomain", ImmutableList.of("true"))); + double percentWithDomains = 100.0 * numEntitiesWithDomains / numEntities; + bodyText = + String.format("%.2f%% have owners, %.2f%% have tags, %.2f%% have description, %.2f%% have domain assigned!", + percentWithOwners, percentWithTags, percentWithDescription, percentWithDomains); + } } - return Highlight.builder().setTitle(title).setValue(numEntities).setBody(bodyText).build(); + return Optional.of(Highlight.builder().setTitle(title).setValue(numEntities).setBody(bodyText).build()); } private int getNumEntitiesFiltered(String index, Map> filters) { return _analyticsService.getHighlights(index, Optional.empty(), filters, - ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty()); + ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty()); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetMetadataAnalyticsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetMetadataAnalyticsResolver.java new file mode 100644 index 0000000000000..ca50b13fb443c --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetMetadataAnalyticsResolver.java @@ -0,0 +1,132 @@ +package com.linkedin.datahub.graphql.analytics.resolver; + +import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.linkedin.datahub.graphql.analytics.service.AnalyticsUtil; +import com.linkedin.datahub.graphql.generated.AnalyticsChart; +import com.linkedin.datahub.graphql.generated.AnalyticsChartGroup; +import com.linkedin.datahub.graphql.generated.BarChart; +import com.linkedin.datahub.graphql.generated.BarSegment; +import com.linkedin.datahub.graphql.generated.MetadataAnalyticsInput; +import com.linkedin.datahub.graphql.generated.NamedBar; +import com.linkedin.datahub.graphql.resolvers.EntityTypeMapper; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.AggregationMetadata; +import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.search.utils.QueryUtils; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import lombok.RequiredArgsConstructor; +import org.apache.commons.lang3.StringUtils; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; + + +/** + * Retrieves the Charts to be rendered of the Analytics screen of the DataHub application. + */ +@RequiredArgsConstructor +public final class GetMetadataAnalyticsResolver implements DataFetcher> { + + private final EntityClient _entityClient; + + @Override + public final List get(DataFetchingEnvironment environment) throws Exception { + final Authentication authentication = ResolverUtils.getAuthentication(environment); + final MetadataAnalyticsInput input = bindArgument(environment.getArgument("input"), MetadataAnalyticsInput.class); + final AnalyticsChartGroup group = new AnalyticsChartGroup(); + group.setGroupId("FilteredMetadataAnalytics"); + group.setTitle(""); + group.setCharts(getCharts(input, authentication)); + return ImmutableList.of(group); + } + + private List getCharts(MetadataAnalyticsInput input, Authentication authentication) throws Exception { + final List charts = new ArrayList<>(); + + List entities = Collections.emptyList(); + if (input.getEntityType() != null) { + entities = ImmutableList.of(EntityTypeMapper.getName(input.getEntityType())); + } + + String query = "*"; + if (!StringUtils.isEmpty(input.getQuery())) { + query = input.getQuery(); + } + + Filter filter = null; + if (!StringUtils.isEmpty(input.getDomain()) && !input.getDomain().equals("ALL")) { + filter = QueryUtils.newFilter("domains.keyword", input.getDomain()); + } + + SearchResult searchResult = _entityClient.searchAcrossEntities(entities, query, filter, 0, 0, authentication); + + List aggregationMetadataList = searchResult.getMetadata().getAggregations(); + + Optional domainAggregation = + aggregationMetadataList.stream().filter(metadata -> metadata.getName().equals("domains")).findFirst(); + + if (StringUtils.isEmpty(input.getDomain()) && domainAggregation.isPresent()) { + List domainChart = buildBarChart(domainAggregation.get()); + AnalyticsUtil.hydrateDisplayNameForBars(_entityClient, domainChart, Constants.DOMAIN_ENTITY_NAME, + ImmutableSet.of(Constants.DOMAIN_PROPERTIES_ASPECT_NAME), AnalyticsUtil::getDomainName, authentication); + charts.add(BarChart.builder().setTitle("Entities by Domain").setBars(domainChart).build()); + } + + Optional platformAggregation = + aggregationMetadataList.stream().filter(metadata -> metadata.getName().equals("platform")).findFirst(); + + if (platformAggregation.isPresent()) { + List platformChart = buildBarChart(platformAggregation.get()); + AnalyticsUtil.hydrateDisplayNameForBars(_entityClient, platformChart, Constants.DATA_PLATFORM_ENTITY_NAME, + ImmutableSet.of(Constants.DATA_PLATFORM_INFO_ASPECT_NAME), AnalyticsUtil::getPlatformName, authentication); + charts.add(BarChart.builder().setTitle("Entities by Platform").setBars(platformChart).build()); + } + + Optional termAggregation = + aggregationMetadataList.stream().filter(metadata -> metadata.getName().equals("glossaryTerms")).findFirst(); + + if (termAggregation.isPresent()) { + List termChart = buildBarChart(termAggregation.get()); + AnalyticsUtil.hydrateDisplayNameForBars(_entityClient, termChart, Constants.GLOSSARY_TERM_ENTITY_NAME, + ImmutableSet.of(Constants.GLOSSARY_TERM_KEY_ASPECT_NAME), AnalyticsUtil::getTermName, authentication); + charts.add(BarChart.builder().setTitle("Entities by Term").setBars(termChart).build()); + } + + Optional envAggregation = + aggregationMetadataList.stream().filter(metadata -> metadata.getName().equals("origin")).findFirst(); + + if (envAggregation.isPresent()) { + List termChart = buildBarChart(envAggregation.get()); + if (termChart.size() > 1) { + charts.add(BarChart.builder().setTitle("Entities by Environment").setBars(termChart).build()); + } + } + + return charts; + } + + private List buildBarChart(AggregationMetadata aggregation) { + return aggregation.getAggregations() + .entrySet() + .stream() + .sorted(Collections.reverseOrder(Map.Entry.comparingByValue())) + .limit(10) + .map(entry -> NamedBar.builder() + .setName(entry.getKey()) + .setSegments(ImmutableList.of( + BarSegment.builder().setLabel("#Entities").setValue(entry.getValue().intValue()).build())) + .build()) + .collect(Collectors.toList()); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java index c17f7b537b8a3..17092234a8e91 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java @@ -3,16 +3,24 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.linkedin.datahub.graphql.generated.BarSegment; +import com.linkedin.datahub.graphql.generated.Cell; import com.linkedin.datahub.graphql.generated.DateInterval; import com.linkedin.datahub.graphql.generated.DateRange; +import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.NamedBar; import com.linkedin.datahub.graphql.generated.NamedLine; import com.linkedin.datahub.graphql.generated.NumericDataPoint; import com.linkedin.datahub.graphql.generated.Row; +import com.linkedin.datahub.graphql.resolvers.EntityTypeMapper; +import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.function.Function; import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.RequestOptions; @@ -32,48 +40,46 @@ import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.Cardinality; import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +@Slf4j +@RequiredArgsConstructor public class AnalyticsService { - private final Logger _logger = LoggerFactory.getLogger(AnalyticsService.class.getName()); - private final RestHighLevelClient _elasticClient; - private final Optional _indexPrefix; + private final IndexConvention _indexConvention; private static final String FILTERED = "filtered"; private static final String DATE_HISTOGRAM = "date_histogram"; private static final String UNIQUE = "unique"; private static final String DIMENSION = "dimension"; private static final String SECOND_DIMENSION = "second_dimension"; - private static final String NA = "N/A"; + public static final String NA = "N/A"; public static final String DATAHUB_USAGE_EVENT_INDEX = "datahub_usage_event"; - public static final String CHART_INDEX = "chartindex_v2"; - public static final String DASHBOARD_INDEX = "dashboardindex_v2"; - public static final String DATA_FLOW_INDEX = "dataflowindex_v2"; - public static final String DATA_JOB_INDEX = "datajobindex_v2"; - public static final String DATASET_INDEX = "datasetindex_v2"; - - public AnalyticsService(final RestHighLevelClient elasticClient, final Optional indexPrefix) { - _elasticClient = elasticClient; - _indexPrefix = indexPrefix; + + @Nonnull + public String getEntityIndexName(EntityType entityType) { + return _indexConvention.getEntityIndexName(EntityTypeMapper.getName(entityType)); } - private String getIndexName(String baseIndexName) { - return _indexPrefix.map(p -> p + "_").orElse("") + baseIndexName; + @Nonnull + public String getAllEntityIndexName() { + return _indexConvention.getEntityIndexName("*"); + } + + @Nonnull + public String getUsageIndexName() { + return _indexConvention.getIndexName(DATAHUB_USAGE_EVENT_INDEX); } public List getTimeseriesChart(String indexName, DateRange dateRange, DateInterval granularity, Optional dimension, // Length 1 for now Map> filters, Optional uniqueOn) { - String finalIndexName = getIndexName(indexName); - _logger.debug( + log.debug( String.format("Invoked getTimeseriesChart with indexName: %s, dateRange: %s, granularity: %s, dimension: %s,", - finalIndexName, dateRange, granularity, dimension) + String.format("filters: %s, uniqueOn: %s", filters, + indexName, dateRange, granularity, dimension) + String.format("filters: %s, uniqueOn: %s", filters, uniqueOn)); AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), Optional.of(dateRange)); @@ -90,7 +96,7 @@ public List getTimeseriesChart(String indexName, DateRange dateRange, filteredAgg.subAggregation(dateHistogram); } - SearchRequest searchRequest = constructSearchRequest(finalIndexName, filteredAgg); + SearchRequest searchRequest = constructSearchRequest(indexName, filteredAgg); Aggregations aggregationResult = executeAndExtract(searchRequest).getAggregations(); try { if (dimension.isPresent()) { @@ -104,7 +110,7 @@ public List getTimeseriesChart(String indexName, DateRange dateRange, new NamedLine("total", extractPointsFromAggregations(aggregationResult, uniqueOn.isPresent()))); } } catch (Exception e) { - _logger.error(String.format("Caught exception while getting time series chart: %s", e.getMessage())); + log.error(String.format("Caught exception while getting time series chart: %s", e.getMessage())); return ImmutableList.of(); } } @@ -122,19 +128,25 @@ private List extractPointsFromAggregations(Aggregations aggreg public List getBarChart(String indexName, Optional dateRange, List dimensions, // Length 1 or 2 - Map> filters, Optional uniqueOn) { - String finalIndexName = getIndexName(indexName); - _logger.debug( - String.format("Invoked getBarChart with indexName: %s, dateRange: %s, dimensions: %s,", finalIndexName, - dateRange, dimensions) + String.format("filters: %s, uniqueOn: %s", filters, uniqueOn)); + Map> filters, Optional uniqueOn, boolean showMissing) { + log.debug( + String.format("Invoked getBarChart with indexName: %s, dateRange: %s, dimensions: %s,", indexName, dateRange, + dimensions) + String.format("filters: %s, uniqueOn: %s", filters, uniqueOn)); assert (dimensions.size() == 1 || dimensions.size() == 2); AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), dateRange); - AggregationBuilder termAgg = AggregationBuilders.terms(DIMENSION).field(dimensions.get(0)).missing(NA); + TermsAggregationBuilder termAgg = AggregationBuilders.terms(DIMENSION).field(dimensions.get(0)); + if (showMissing) { + termAgg.missing(NA); + } + if (dimensions.size() == 2) { - AggregationBuilder secondTermAgg = - AggregationBuilders.terms(SECOND_DIMENSION).field(dimensions.get(1)).missing(NA); + TermsAggregationBuilder secondTermAgg = + AggregationBuilders.terms(SECOND_DIMENSION).field(dimensions.get(1)); + if (showMissing) { + secondTermAgg.missing(NA); + } uniqueOn.ifPresent(s -> secondTermAgg.subAggregation(getUniqueQuery(s))); termAgg.subAggregation(secondTermAgg); } else { @@ -142,7 +154,7 @@ public List getBarChart(String indexName, Optional dateRang } filteredAgg.subAggregation(termAgg); - SearchRequest searchRequest = constructSearchRequest(finalIndexName, filteredAgg); + SearchRequest searchRequest = constructSearchRequest(indexName, filteredAgg); Aggregations aggregationResult = executeAndExtract(searchRequest).getAggregations(); try { @@ -150,7 +162,8 @@ public List getBarChart(String indexName, Optional dateRang List barSegments = extractBarSegmentsFromAggregations(aggregationResult, DIMENSION, uniqueOn.isPresent()); return barSegments.stream() - .map(segment -> new NamedBar(segment.getLabel(), ImmutableList.of(segment))) + .map(segment -> new NamedBar(segment.getLabel(), + ImmutableList.of(BarSegment.builder().setLabel("Count").setValue(segment.getValue()).build()))) .collect(Collectors.toList()); } else { return aggregationResult.get(DIMENSION).getBuckets() @@ -160,7 +173,7 @@ public List getBarChart(String indexName, Optional dateRang .collect(Collectors.toList()); } } catch (Exception e) { - _logger.error(String.format("Caught exception while getting bar chart: %s", e.getMessage())); + log.error(String.format("Caught exception while getting bar chart: %s", e.getMessage())); return ImmutableList.of(); } } @@ -173,12 +186,19 @@ private List extractBarSegmentsFromAggregations(Aggregations aggrega .collect(Collectors.toList()); } + public Row buildRow(String groupByValue, Function groupByValueToCell, int count) { + List values = ImmutableList.of(groupByValue, String.valueOf(count)); + List cells = ImmutableList.of(groupByValueToCell.apply(groupByValue), + Cell.builder().setValue(String.valueOf(count)).build()); + return new Row(values, cells); + } + public List getTopNTableChart(String indexName, Optional dateRange, String groupBy, - Map> filters, Optional uniqueOn, int maxRows) { - String finalIndexName = getIndexName(indexName); - _logger.debug( - String.format("Invoked getTopNTableChart with indexName: %s, dateRange: %s, groupBy: %s", finalIndexName, - dateRange, groupBy) + String.format("filters: %s, uniqueOn: %s", filters, uniqueOn)); + Map> filters, Optional uniqueOn, int maxRows, + Function groupByValueToCell) { + log.debug( + String.format("Invoked getTopNTableChart with indexName: %s, dateRange: %s, groupBy: %s", indexName, dateRange, + groupBy) + String.format("filters: %s, uniqueOn: %s", filters, uniqueOn)); AggregationBuilder filteredAgg = getFilteredAggregation(filters, ImmutableMap.of(), dateRange); @@ -189,31 +209,31 @@ public List getTopNTableChart(String indexName, Optional dateRan } filteredAgg.subAggregation(termAgg); - SearchRequest searchRequest = constructSearchRequest(finalIndexName, filteredAgg); + SearchRequest searchRequest = constructSearchRequest(indexName, filteredAgg); Aggregations aggregationResult = executeAndExtract(searchRequest).getAggregations(); try { return aggregationResult.get(DIMENSION).getBuckets() .stream() - .map(bucket -> new Row( - ImmutableList.of(bucket.getKeyAsString(), String.valueOf(extractCount(bucket, uniqueOn.isPresent()))))) + .map(bucket -> buildRow(bucket.getKeyAsString(), groupByValueToCell, + extractCount(bucket, uniqueOn.isPresent()))) .collect(Collectors.toList()); } catch (Exception e) { - _logger.error(String.format("Caught exception while getting top n chart: %s", e.getMessage())); + log.error(String.format("Caught exception while getting top n chart: %s", e.getMessage())); return ImmutableList.of(); } } public int getHighlights(String indexName, Optional dateRange, Map> filters, Map> mustNotFilters, Optional uniqueOn) { - String finalIndexName = getIndexName(indexName); - _logger.debug(String.format("Invoked getHighlights with indexName: %s, dateRange: %s", finalIndexName, dateRange) - + String.format("filters: %s, uniqueOn: %s", filters, uniqueOn)); + log.debug( + String.format("Invoked getHighlights with indexName: %s, dateRange: %s", indexName, dateRange) + String.format( + "filters: %s, uniqueOn: %s", filters, uniqueOn)); AggregationBuilder filteredAgg = getFilteredAggregation(filters, mustNotFilters, dateRange); uniqueOn.ifPresent(s -> filteredAgg.subAggregation(getUniqueQuery(s))); - SearchRequest searchRequest = constructSearchRequest(finalIndexName, filteredAgg); + SearchRequest searchRequest = constructSearchRequest(indexName, filteredAgg); Filter aggregationResult = executeAndExtract(searchRequest); try { if (uniqueOn.isPresent()) { @@ -222,7 +242,7 @@ public int getHighlights(String indexName, Optional dateRange, Mapget(FILTERED); } catch (Exception e) { - _logger.error(String.format("Search query failed: %s", e.getMessage())); + log.error(String.format("Search query failed: %s", e.getMessage())); throw new RuntimeException("Search query failed:", e); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsUtil.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsUtil.java new file mode 100644 index 0000000000000..64027826f19bf --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsUtil.java @@ -0,0 +1,150 @@ +package com.linkedin.datahub.graphql.analytics.service; + +import com.datahub.authentication.Authentication; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.BarSegment; +import com.linkedin.datahub.graphql.generated.Cell; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.EntityProfileParams; +import com.linkedin.datahub.graphql.generated.LinkParams; +import com.linkedin.datahub.graphql.generated.NamedBar; +import com.linkedin.datahub.graphql.generated.Row; +import com.linkedin.datahub.graphql.generated.SearchParams; +import com.linkedin.datahub.graphql.types.common.mappers.UrnToEntityMapper; +import com.linkedin.dataplatform.DataPlatformInfo; +import com.linkedin.domain.DomainProperties; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.key.DatasetKey; +import com.linkedin.metadata.key.GlossaryTermKey; +import java.net.URISyntaxException; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.tuple.Pair; + + +@Slf4j +public class AnalyticsUtil { + private AnalyticsUtil() { + } + + public static Cell buildCellWithSearchLandingPage(String query) { + Cell result = new Cell(); + result.setValue(query); + result.setLinkParams(LinkParams.builder().setSearchParams(SearchParams.builder().setQuery(query).build()).build()); + return result; + } + + public static Cell buildCellWithEntityLandingPage(String urn) { + Cell result = new Cell(); + result.setValue(urn); + try { + Entity entity = UrnToEntityMapper.map(Urn.createFromString(urn)); + result.setEntity(entity); + result.setLinkParams(LinkParams.builder() + .setEntityProfileParams(EntityProfileParams.builder().setUrn(urn).setType(entity.getType()).build()) + .build()); + } catch (URISyntaxException e) { + log.error("Malformed urn {} in table", urn, e); + } + return result; + } + + public static void hydrateDisplayNameForBars(EntityClient entityClient, List bars, String entityName, + Set aspectNames, Function> extractDisplayName, + Authentication authentication) throws Exception { + Map urnToDisplayName = + getUrnToDisplayName(entityClient, bars.stream().map(NamedBar::getName).collect(Collectors.toList()), entityName, + aspectNames, extractDisplayName, authentication); + // For each urn, try to find it's name, use the urn if not found + bars.forEach(namedBar -> namedBar.setName(urnToDisplayName.getOrDefault(namedBar.getName(), namedBar.getName()))); + } + + public static void hydrateDisplayNameForSegments(EntityClient entityClient, List bars, String entityName, + Set aspectNames, Function> extractDisplayName, + Authentication authentication) throws Exception { + Map urnToDisplayName = getUrnToDisplayName(entityClient, + bars.stream().flatMap(bar -> bar.getSegments().stream().map(BarSegment::getLabel)).collect(Collectors.toList()), + entityName, aspectNames, extractDisplayName, authentication); + // For each urn, try to find it's name, use the urn if not found + bars.forEach(namedBar -> namedBar.getSegments() + .forEach(segment -> segment.setLabel(urnToDisplayName.getOrDefault(segment.getLabel(), segment.getLabel())))); + } + + public static void hydrateDisplayNameForTable(EntityClient entityClient, List rows, String entityName, + Set aspectNames, Function> extractDisplayName, + Authentication authentication) throws Exception { + Map urnToDisplayName = getUrnToDisplayName(entityClient, rows.stream() + .flatMap(row -> row.getCells().stream().filter(cell -> cell.getEntity() != null).map(Cell::getValue)) + .collect(Collectors.toList()), entityName, aspectNames, extractDisplayName, authentication); + // For each urn, try to find it's name, use the urn if not found + rows.forEach(row -> row.getCells().forEach(cell -> { + if (cell.getEntity() != null) { + cell.setValue(urnToDisplayName.getOrDefault(cell.getValue(), cell.getValue())); + } + })); + } + + public static Map getUrnToDisplayName(EntityClient entityClient, List urns, String entityName, + Set aspectNames, Function> extractDisplayName, + Authentication authentication) throws Exception { + Set uniqueUrns = urns.stream().distinct().map(urnStr -> { + try { + return Urn.createFromString(urnStr); + } catch (URISyntaxException e) { + return null; + } + }).filter(Objects::nonNull).collect(Collectors.toSet()); + Map aspects = entityClient.batchGetV2(entityName, uniqueUrns, aspectNames, authentication); + return aspects.entrySet() + .stream() + .map(entry -> Pair.of(entry.getKey().toString(), extractDisplayName.apply(entry.getValue()))) + .filter(pair -> pair.getValue().isPresent()) + .collect(Collectors.toMap(Pair::getKey, pair -> pair.getValue().get())); + } + + public static Optional getDomainName(EntityResponse entityResponse) { + EnvelopedAspect domainProperties = entityResponse.getAspects().get(Constants.DOMAIN_PROPERTIES_ASPECT_NAME); + if (domainProperties == null) { + return Optional.empty(); + } + return Optional.of(new DomainProperties(domainProperties.getValue().data()).getName()); + } + + public static Optional getPlatformName(EntityResponse entityResponse) { + EnvelopedAspect envelopedDataPlatformInfo = + entityResponse.getAspects().get(Constants.DATA_PLATFORM_INFO_ASPECT_NAME); + if (envelopedDataPlatformInfo == null) { + return Optional.empty(); + } + DataPlatformInfo dataPlatformInfo = new DataPlatformInfo(envelopedDataPlatformInfo.getValue().data()); + return Optional.of( + dataPlatformInfo.getDisplayName() == null ? dataPlatformInfo.getName() : dataPlatformInfo.getDisplayName()); + } + + public static Optional getDatasetName(EntityResponse entityResponse) { + EnvelopedAspect envelopedDatasetKey = entityResponse.getAspects().get(Constants.DATASET_KEY_ASPECT_NAME); + if (envelopedDatasetKey == null) { + return Optional.empty(); + } + DatasetKey datasetKey = new DatasetKey(envelopedDatasetKey.getValue().data()); + return Optional.of(datasetKey.getName()); + } + + public static Optional getTermName(EntityResponse entityResponse) { + EnvelopedAspect envelopedDatasetKey = entityResponse.getAspects().get(Constants.GLOSSARY_TERM_KEY_ASPECT_NAME); + if (envelopedDatasetKey == null) { + return Optional.empty(); + } + GlossaryTermKey glossaryTermKey = new GlossaryTermKey(envelopedDatasetKey.getValue().data()); + return Optional.of(glossaryTermKey.getName()); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/AddGroupMembersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/AddGroupMembersResolver.java index f9d27c58a09bb..ef198b214ffcb 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/AddGroupMembersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/AddGroupMembersResolver.java @@ -1,9 +1,12 @@ package com.linkedin.datahub.graphql.resolvers.group; +import com.google.common.collect.ImmutableList; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.authorization.ConjunctivePrivilegeGroup; +import com.linkedin.datahub.graphql.authorization.DisjunctivePrivilegeGroup; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; @@ -12,6 +15,7 @@ import com.linkedin.entity.client.EntityClient; import com.linkedin.events.metadata.ChangeType; import com.linkedin.identity.GroupMembership; +import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.utils.GenericAspectUtils; import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetcher; @@ -20,6 +24,7 @@ import java.util.List; import java.util.concurrent.CompletableFuture; +import static com.linkedin.datahub.graphql.resolvers.AuthUtils.*; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; import static com.linkedin.metadata.Constants.*; @@ -38,10 +43,10 @@ public AddGroupMembersResolver(final EntityClient entityClient) { @Override public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { + final AddGroupMembersInput input = bindArgument(environment.getArgument("input"), AddGroupMembersInput.class); final QueryContext context = environment.getContext(); - if (AuthorizationUtils.canManageUsersAndGroups(context)) { - final AddGroupMembersInput input = bindArgument(environment.getArgument("input"), AddGroupMembersInput.class); + if (isAuthorized(input, context)) { final String groupUrnStr = input.getGroupUrn(); final List userUrnStrs = input.getUserUrns(); @@ -60,6 +65,20 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator."); } + private boolean isAuthorized(AddGroupMembersInput input, QueryContext context) { + final DisjunctivePrivilegeGroup orPrivilegeGroups = new DisjunctivePrivilegeGroup(ImmutableList.of( + ALL_PRIVILEGES_GROUP, + new ConjunctivePrivilegeGroup(ImmutableList.of(PoliciesConfig.EDIT_GROUP_MEMBERS_PRIVILEGE.getType())) + )); + + return AuthorizationUtils.isAuthorized( + context.getAuthorizer(), + context.getActorUrn(), + CORP_GROUP_ENTITY_NAME, + input.getGroupUrn(), + orPrivilegeGroups); + } + private void addUserToGroup(final String userUrnStr, final String groupUrnStr, final QueryContext context) { try { // First, fetch user's group membership aspect. diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupMembersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupMembersResolver.java index 3f60c623e38ab..d6064c47d0fb3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupMembersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupMembersResolver.java @@ -1,15 +1,19 @@ package com.linkedin.datahub.graphql.resolvers.group; +import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.authorization.ConjunctivePrivilegeGroup; +import com.linkedin.datahub.graphql.authorization.DisjunctivePrivilegeGroup; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.RemoveGroupMembersInput; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.client.EntityClient; import com.linkedin.events.metadata.ChangeType; import com.linkedin.identity.GroupMembership; +import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.utils.GenericAspectUtils; import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetcher; @@ -20,6 +24,7 @@ import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; +import static com.linkedin.datahub.graphql.resolvers.AuthUtils.*; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; import static com.linkedin.metadata.Constants.*; @@ -35,10 +40,10 @@ public RemoveGroupMembersResolver(final EntityClient entityClient) { @Override public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { + final RemoveGroupMembersInput input = bindArgument(environment.getArgument("input"), RemoveGroupMembersInput.class); final QueryContext context = environment.getContext(); - if (AuthorizationUtils.canManageUsersAndGroups(context)) { - final RemoveGroupMembersInput input = bindArgument(environment.getArgument("input"), RemoveGroupMembersInput.class); + if (isAuthorized(input, context)) { final Urn groupUrn = Urn.createFromString(input.getGroupUrn()); final Set userUrns = input.getUserUrns().stream().map(UrnUtils::getUrn).collect(Collectors.toSet()); final Map entityResponseMap = _entityClient.batchGetV2(CORP_USER_ENTITY_NAME, @@ -72,4 +77,18 @@ public CompletableFuture get(final DataFetchingEnvironment environment) } throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator."); } + + private boolean isAuthorized(RemoveGroupMembersInput input, QueryContext context) { + final DisjunctivePrivilegeGroup orPrivilegeGroups = new DisjunctivePrivilegeGroup(ImmutableList.of( + ALL_PRIVILEGES_GROUP, + new ConjunctivePrivilegeGroup(ImmutableList.of(PoliciesConfig.EDIT_GROUP_MEMBERS_PRIVILEGE.getType())) + )); + + return AuthorizationUtils.isAuthorized( + context.getAuthorizer(), + context.getActorUrn(), + CORP_GROUP_ENTITY_NAME, + input.getGroupUrn(), + orPrivilegeGroups); + } } \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupResolver.java index fad0c788c38f0..5cbee1defc038 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/RemoveGroupResolver.java @@ -10,7 +10,7 @@ import java.util.concurrent.CompletableFuture; /** - * Resolver responsible for hard deleting a particular DataHub Corp User + * Resolver responsible for hard deleting a particular DataHub Corp Group */ public class RemoveGroupResolver implements DataFetcher> { @@ -28,6 +28,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final Urn urn = Urn.createFromString(groupUrn); return CompletableFuture.supplyAsync(() -> { try { + // TODO: Remove all dangling references to this group. _entityClient.deleteEntity(urn, context.getAuthentication()); return true; } catch (Exception e) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/DescriptionUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/DescriptionUtils.java index 704b1bd2c13f4..2a0dbd2e51bc3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/DescriptionUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/DescriptionUtils.java @@ -11,6 +11,7 @@ import com.linkedin.datahub.graphql.generated.SubResourceType; import com.linkedin.domain.DomainProperties; import com.linkedin.glossary.GlossaryTermInfo; +import com.linkedin.identity.CorpGroupEditableInfo; import com.linkedin.metadata.Constants; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; @@ -97,6 +98,19 @@ public static void updateTagDescription( persistAspect(resourceUrn, Constants.TAG_PROPERTIES_ASPECT_NAME, tagProperties, actor, entityService); } + public static void updateCorpGroupDescription( + String newDescription, + Urn resourceUrn, + Urn actor, + EntityService entityService + ) { + CorpGroupEditableInfo corpGroupEditableInfo = + (CorpGroupEditableInfo) getAspectFromEntity( + resourceUrn.toString(), Constants.CORP_GROUP_EDITABLE_INFO_ASPECT_NAME, entityService, new CorpGroupEditableInfo()); + corpGroupEditableInfo.setDescription(newDescription); + persistAspect(resourceUrn, Constants.CORP_GROUP_EDITABLE_INFO_ASPECT_NAME, corpGroupEditableInfo, actor, entityService); + } + public static void updateGlossaryTermDescription( String newDescription, Urn resourceUrn, @@ -161,6 +175,16 @@ public static Boolean validateLabelInput( return true; } + public static Boolean validateCorpGroupInput( + Urn corpUserUrn, + EntityService entityService + ) { + if (!entityService.exists(corpUserUrn)) { + throw new IllegalArgumentException(String.format("Failed to update %s. %s does not exist.", corpUserUrn, corpUserUrn)); + } + return true; + } + public static boolean isAuthorizedToUpdateFieldDescription(@Nonnull QueryContext context, Urn targetUrn) { final DisjunctivePrivilegeGroup orPrivilegeGroups = new DisjunctivePrivilegeGroup(ImmutableList.of( ALL_PRIVILEGES_GROUP, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateDescriptionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateDescriptionResolver.java index 966a9bde73aa6..77b586bced602 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateDescriptionResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateDescriptionResolver.java @@ -15,7 +15,6 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; - @Slf4j @RequiredArgsConstructor public class UpdateDescriptionResolver implements DataFetcher> { @@ -37,6 +36,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw return updateGlossaryTermDescription(targetUrn, input, environment.getContext()); case Constants.TAG_ENTITY_NAME: return updateTagDescription(targetUrn, input, environment.getContext()); + case Constants.CORP_GROUP_ENTITY_NAME: + return updateCorpGroupDescription(targetUrn, input, environment.getContext()); default: throw new RuntimeException( String.format("Failed to update description. Unsupported resource type %s provided.", targetUrn)); @@ -167,4 +168,28 @@ private CompletableFuture updateGlossaryTermDescription(Urn targetUrn, } }); } + + private CompletableFuture updateCorpGroupDescription(Urn targetUrn, DescriptionUpdateInput input, QueryContext context) { + return CompletableFuture.supplyAsync(() -> { + + if (!DescriptionUtils.isAuthorizedToUpdateDescription(context, targetUrn)) { + throw new AuthorizationException( + "Unauthorized to perform this action. Please contact your DataHub administrator."); + } + DescriptionUtils.validateCorpGroupInput(targetUrn, _entityService); + + try { + Urn actor = CorpuserUrn.createFromString(context.getActorUrn()); + DescriptionUtils.updateCorpGroupDescription( + input.getDescription(), + targetUrn, + actor, + _entityService); + return true; + } catch (Exception e) { + log.error("Failed to perform update against input {}, {}", input.toString(), e.getMessage()); + throw new RuntimeException(String.format("Failed to perform update against input %s", input.toString()), e); + } + }); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java index 2c00eee5f9106..fc0f1732b5632 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java @@ -38,7 +38,8 @@ @RequiredArgsConstructor public class ListRecommendationsResolver implements DataFetcher> { - private static final ListRecommendationsResult EMPTY_RECOMMENDATIONS = new ListRecommendationsResult(Collections.emptyList()); + private static final ListRecommendationsResult EMPTY_RECOMMENDATIONS = + new ListRecommendationsResult(Collections.emptyList()); private final RecommendationsService _recommendationsService; @@ -154,8 +155,11 @@ private RecommendationParams mapRecommendationParams( } if (params.hasEntityProfileParams()) { - mappedParams.setEntityProfileParams( - EntityProfileParams.builder().setUrn(params.getEntityProfileParams().getUrn().toString()).build()); + Urn profileUrn = params.getEntityProfileParams().getUrn(); + mappedParams.setEntityProfileParams(EntityProfileParams.builder() + .setUrn(profileUrn.toString()) + .setType(EntityTypeMapper.getType(profileUrn.getEntityType())) + .build()); } if (params.hasContentParams()) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/UpdateUserStatusResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/UpdateUserStatusResolver.java similarity index 97% rename from datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/UpdateUserStatusResolver.java rename to datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/UpdateUserStatusResolver.java index 498d3685e508c..e03c4ff968d4d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/group/UpdateUserStatusResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/user/UpdateUserStatusResolver.java @@ -1,4 +1,4 @@ -package com.linkedin.datahub.graphql.resolvers.group; +package com.linkedin.datahub.graphql.resolvers.user; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/CorpGroupType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/CorpGroupType.java index 9701cfb6b7832..813100db5554b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/CorpGroupType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/CorpGroupType.java @@ -1,21 +1,35 @@ package com.linkedin.datahub.graphql.types.corpgroup; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.RecordTemplate; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.authorization.ConjunctivePrivilegeGroup; +import com.linkedin.datahub.graphql.authorization.DisjunctivePrivilegeGroup; +import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AutoCompleteResults; import com.linkedin.datahub.graphql.generated.CorpGroup; +import com.linkedin.datahub.graphql.generated.CorpGroupUpdateInput; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.generated.SearchResults; +import com.linkedin.datahub.graphql.types.MutableType; import com.linkedin.datahub.graphql.types.SearchableEntityType; import com.linkedin.datahub.graphql.types.corpgroup.mappers.CorpGroupMapper; import com.linkedin.datahub.graphql.types.mappers.AutoCompleteResultsMapper; import com.linkedin.datahub.graphql.types.mappers.UrnSearchResultsMapper; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.client.EntityClient; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.identity.CorpGroupEditableInfo; +import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.utils.GenericAspectUtils; +import com.linkedin.mxe.MetadataChangeProposal; import graphql.execution.DataFetcherResult; import java.util.ArrayList; import java.util.Collections; @@ -28,8 +42,7 @@ import static com.linkedin.metadata.Constants.*; - -public class CorpGroupType implements SearchableEntityType { +public class CorpGroupType implements SearchableEntityType, MutableType { private final EntityClient _entityClient; @@ -42,6 +55,10 @@ public Class objectClass() { return CorpGroup.class; } + public Class inputClass() { + return CorpGroupUpdateInput.class; + } + @Override public EntityType type() { return EntityType.CORP_GROUP; @@ -93,4 +110,84 @@ public AutoCompleteResults autoComplete(@Nonnull String query, context.getAuthentication()); return AutoCompleteResultsMapper.map(result); } + + @Override + public CorpGroup update(@Nonnull String urn, @Nonnull CorpGroupUpdateInput input, @Nonnull QueryContext context) throws Exception { + if (isAuthorizedToUpdate(urn, input, context)) { + // Get existing editable info to merge with + Urn groupUrn = Urn.createFromString(urn); + Map gmsResponse = + _entityClient.batchGetV2(CORP_GROUP_ENTITY_NAME, ImmutableSet.of(groupUrn), ImmutableSet.of( + CORP_GROUP_EDITABLE_INFO_ASPECT_NAME), + context.getAuthentication()); + + CorpGroupEditableInfo existingCorpGroupEditableInfo = null; + if (gmsResponse.containsKey(groupUrn) && gmsResponse.get(groupUrn).getAspects().containsKey(CORP_GROUP_EDITABLE_INFO_ASPECT_NAME)) { + existingCorpGroupEditableInfo = new CorpGroupEditableInfo(gmsResponse.get(groupUrn).getAspects() + .get(CORP_GROUP_EDITABLE_INFO_ASPECT_NAME).getValue().data()); + } + + // Create the MCP + final MetadataChangeProposal proposal = new MetadataChangeProposal(); + proposal.setEntityUrn(Urn.createFromString(urn)); + proposal.setEntityType(CORP_GROUP_ENTITY_NAME); + proposal.setAspectName(CORP_GROUP_EDITABLE_INFO_ASPECT_NAME); + proposal.setAspect( + GenericAspectUtils.serializeAspect(mapCorpGroupEditableInfo(input, existingCorpGroupEditableInfo))); + proposal.setChangeType(ChangeType.UPSERT); + _entityClient.ingestProposal(proposal, context.getAuthentication()); + + return load(urn, context).getData(); + } + throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator."); + } + + private boolean isAuthorizedToUpdate(String urn, CorpGroupUpdateInput input, QueryContext context) { + // Decide whether the current principal should be allowed to update the Dataset. + final DisjunctivePrivilegeGroup orPrivilegeGroups = getAuthorizedPrivileges(input); + return AuthorizationUtils.isAuthorized( + context.getAuthorizer(), + context.getAuthentication().getActor().toUrnStr(), + PoliciesConfig.CORP_GROUP_PRIVILEGES.getResourceType(), + urn, + orPrivilegeGroups); + } + + private DisjunctivePrivilegeGroup getAuthorizedPrivileges(final CorpGroupUpdateInput updateInput) { + final ConjunctivePrivilegeGroup allPrivilegesGroup = new ConjunctivePrivilegeGroup(ImmutableList.of( + PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType() + )); + + List specificPrivileges = new ArrayList<>(); + if (updateInput.getDescription() != null) { + // Requires the Update Docs privilege. + specificPrivileges.add(PoliciesConfig.EDIT_ENTITY_DOCS_PRIVILEGE.getType()); + } else if (updateInput.getSlack() != null || updateInput.getEmail() != null) { + // Requires the Update Contact info privilege. + specificPrivileges.add(PoliciesConfig.EDIT_CONTACT_INFO_PRIVILEGE.getType()); + } + + final ConjunctivePrivilegeGroup specificPrivilegeGroup = new ConjunctivePrivilegeGroup(specificPrivileges); + + // If you either have all entity privileges, or have the specific privileges required, you are authorized. + return new DisjunctivePrivilegeGroup(ImmutableList.of( + allPrivilegesGroup, + specificPrivilegeGroup + )); + } + + private RecordTemplate mapCorpGroupEditableInfo(CorpGroupUpdateInput input, @Nullable CorpGroupEditableInfo existing) { + CorpGroupEditableInfo result = existing != null ? existing : new CorpGroupEditableInfo(); + + if (input.getDescription() != null) { + result.setDescription(input.getDescription()); + } + if (input.getSlack() != null) { + result.setSlack(input.getSlack()); + } + if (input.getEmail() != null) { + result.setEmail(input.getEmail()); + } + return result; + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/mappers/CorpGroupEditablePropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/mappers/CorpGroupEditablePropertiesMapper.java new file mode 100644 index 0000000000000..f476794bc545e --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/mappers/CorpGroupEditablePropertiesMapper.java @@ -0,0 +1,30 @@ +package com.linkedin.datahub.graphql.types.corpgroup.mappers; + +import com.linkedin.data.template.GetMode; +import com.linkedin.datahub.graphql.generated.CorpGroupEditableProperties; +import com.linkedin.datahub.graphql.types.mappers.ModelMapper; + +import javax.annotation.Nonnull; + +/** + * Maps Pegasus {@link RecordTemplate} objects to objects conforming to the GQL schema. + * + * To be replaced by auto-generated mappers implementations + */ +public class CorpGroupEditablePropertiesMapper implements ModelMapper { + + public static final CorpGroupEditablePropertiesMapper INSTANCE = new CorpGroupEditablePropertiesMapper(); + + public static CorpGroupEditableProperties map(@Nonnull final com.linkedin.identity.CorpGroupEditableInfo corpGroupEditableInfo) { + return INSTANCE.apply(corpGroupEditableInfo); + } + + @Override + public CorpGroupEditableProperties apply(@Nonnull final com.linkedin.identity.CorpGroupEditableInfo corpGroupEditableInfo) { + final CorpGroupEditableProperties result = new CorpGroupEditableProperties(); + result.setDescription(corpGroupEditableInfo.getDescription(GetMode.DEFAULT)); + result.setSlack(corpGroupEditableInfo.getSlack(GetMode.DEFAULT)); + result.setEmail(corpGroupEditableInfo.getEmail(GetMode.DEFAULT)); + return result; + } +} \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/mappers/CorpGroupMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/mappers/CorpGroupMapper.java index dbce338ecb7b5..3b353be749025 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/mappers/CorpGroupMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/mappers/CorpGroupMapper.java @@ -7,6 +7,7 @@ import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.identity.CorpGroupEditableInfo; import com.linkedin.identity.CorpGroupInfo; import com.linkedin.metadata.key.CorpGroupKey; import javax.annotation.Nonnull; @@ -36,7 +37,7 @@ public CorpGroup apply(@Nonnull final EntityResponse entityResponse) { MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); mappingHelper.mapToResult(CORP_GROUP_KEY_ASPECT_NAME, this::mapCorpGroupKey); mappingHelper.mapToResult(CORP_GROUP_INFO_ASPECT_NAME, this::mapCorpGroupInfo); - + mappingHelper.mapToResult(CORP_GROUP_EDITABLE_INFO_ASPECT_NAME, this::mapCorpGroupEditableInfo); return mappingHelper.getResult(); } @@ -50,4 +51,8 @@ private void mapCorpGroupInfo(@Nonnull CorpGroup corpGroup, @Nonnull DataMap dat corpGroup.setProperties(CorpGroupPropertiesMapper.map(corpGroupInfo)); corpGroup.setInfo(CorpGroupInfoMapper.map(corpGroupInfo)); } + + private void mapCorpGroupEditableInfo(@Nonnull CorpGroup corpGroup, @Nonnull DataMap dataMap) { + corpGroup.setEditableProperties(CorpGroupEditablePropertiesMapper.map(new CorpGroupEditableInfo(dataMap))); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java index 2638a15fc25e5..7eaa67d9127af 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java @@ -1,12 +1,16 @@ package com.linkedin.datahub.graphql.types.corpuser; +import com.google.common.collect.ImmutableList; import com.linkedin.common.url.Url; -import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.authorization.ConjunctivePrivilegeGroup; +import com.linkedin.datahub.graphql.authorization.DisjunctivePrivilegeGroup; +import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AutoCompleteResults; import com.linkedin.datahub.graphql.generated.CorpUser; import com.linkedin.datahub.graphql.generated.CorpUserUpdateInput; @@ -23,12 +27,12 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.identity.CorpUserEditableInfo; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.utils.GenericAspectUtils; import com.linkedin.mxe.MetadataChangeProposal; import graphql.execution.DataFetcherResult; -import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; @@ -106,37 +110,70 @@ public AutoCompleteResults autoComplete(@Nonnull String query, return AutoCompleteResultsMapper.map(result); } - private CorpuserUrn getCorpUserUrn(final String urnStr) { - try { - return CorpuserUrn.createFromString(urnStr); - } catch (URISyntaxException e) { - throw new RuntimeException(String.format("Failed to retrieve user with urn %s, invalid urn", urnStr)); - } - } - public Class inputClass() { return CorpUserUpdateInput.class; } @Override public CorpUser update(@Nonnull String urn, @Nonnull CorpUserUpdateInput input, @Nonnull QueryContext context) throws Exception { - final CorpuserUrn actor = CorpuserUrn.createFromString(context.getAuthentication().getActor().toUrnStr()); - - // Get existing editable info to merge with - Optional existingCorpUserEditableInfo = - _entityClient.getVersionedAspect(urn, Constants.CORP_USER_EDITABLE_INFO_NAME, 0L, CorpUserEditableInfo.class, - context.getAuthentication()); - - // Create the MCP - final MetadataChangeProposal proposal = new MetadataChangeProposal(); - proposal.setEntityUrn(Urn.createFromString(urn)); - proposal.setEntityType(Constants.CORP_USER_ENTITY_NAME); - proposal.setAspectName(Constants.CORP_USER_EDITABLE_INFO_NAME); - proposal.setAspect(GenericAspectUtils.serializeAspect(mapCorpUserEditableInfo(input, existingCorpUserEditableInfo))); - proposal.setChangeType(ChangeType.UPSERT); - _entityClient.ingestProposal(proposal, context.getAuthentication()); - - return load(urn, context).getData(); + if (isAuthorizedToUpdate(urn, input, context)) { + // Get existing editable info to merge with + Optional existingCorpUserEditableInfo = + _entityClient.getVersionedAspect(urn, Constants.CORP_USER_EDITABLE_INFO_NAME, 0L, CorpUserEditableInfo.class, + context.getAuthentication()); + + // Create the MCP + final MetadataChangeProposal proposal = new MetadataChangeProposal(); + proposal.setEntityUrn(Urn.createFromString(urn)); + proposal.setEntityType(Constants.CORP_USER_ENTITY_NAME); + proposal.setAspectName(Constants.CORP_USER_EDITABLE_INFO_NAME); + proposal.setAspect(GenericAspectUtils.serializeAspect(mapCorpUserEditableInfo(input, existingCorpUserEditableInfo))); + proposal.setChangeType(ChangeType.UPSERT); + _entityClient.ingestProposal(proposal, context.getAuthentication()); + + return load(urn, context).getData(); + } + throw new AuthorizationException("Unauthorized to perform this action. Please contact your DataHub administrator."); + } + + private boolean isAuthorizedToUpdate(String urn, CorpUserUpdateInput input, QueryContext context) { + // Decide whether the current principal should be allowed to update the Dataset. + final DisjunctivePrivilegeGroup orPrivilegeGroups = getAuthorizedPrivileges(input); + + // Either the updating actor is the user, or the actor has privileges to update the user information. + return context.getActorUrn().equals(urn) || AuthorizationUtils.isAuthorized( + context.getAuthorizer(), + context.getAuthentication().getActor().toUrnStr(), + PoliciesConfig.CORP_GROUP_PRIVILEGES.getResourceType(), + urn, + orPrivilegeGroups); + } + + private DisjunctivePrivilegeGroup getAuthorizedPrivileges(final CorpUserUpdateInput updateInput) { + final ConjunctivePrivilegeGroup allPrivilegesGroup = new ConjunctivePrivilegeGroup(ImmutableList.of( + PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType() + )); + + List specificPrivileges = new ArrayList<>(); + if (updateInput.getSlack() != null + || updateInput.getEmail() != null + || updateInput.getPhone() != null) { + specificPrivileges.add(PoliciesConfig.EDIT_CONTACT_INFO_PRIVILEGE.getType()); + } else if (updateInput.getAboutMe() != null + || updateInput.getDisplayName() != null + || updateInput.getPictureLink() != null + || updateInput.getTeams() != null + || updateInput.getTitle() != null) { + specificPrivileges.add(PoliciesConfig.EDIT_USER_PROFILE_PRIVILEGE.getType()); + } + + final ConjunctivePrivilegeGroup specificPrivilegeGroup = new ConjunctivePrivilegeGroup(specificPrivileges); + + // If you either have all entity privileges, or have the specific privileges required, you are authorized. + return new DisjunctivePrivilegeGroup(ImmutableList.of( + allPrivilegesGroup, + specificPrivilegeGroup + )); } private RecordTemplate mapCorpUserEditableInfo(CorpUserUpdateInput input, Optional existing) { @@ -159,6 +196,9 @@ private RecordTemplate mapCorpUserEditableInfo(CorpUserUpdateInput input, Option if (input.getTeams() != null) { result.setTeams(new StringArray(input.getTeams())); } + if (input.getTitle() != null) { + result.setTitle(input.getTitle()); + } if (input.getPhone() != null) { result.setPhone(input.getPhone()); } @@ -168,9 +208,6 @@ private RecordTemplate mapCorpUserEditableInfo(CorpUserUpdateInput input, Option if (input.getEmail() != null) { result.setEmail(input.getEmail()); } - if (input.getTitle() != null) { - result.setTitle(input.getTitle()); - } return result; } diff --git a/datahub-graphql-core/src/main/resources/analytics.graphql b/datahub-graphql-core/src/main/resources/analytics.graphql index 8eb8aed1725d6..3a72fe103701a 100644 --- a/datahub-graphql-core/src/main/resources/analytics.graphql +++ b/datahub-graphql-core/src/main/resources/analytics.graphql @@ -16,8 +16,34 @@ extend type Query { Retrieves a set of server driven Analytics Highlight Cards to render in the UI """ getHighlights: [Highlight!]! + + """ + Retrieves a set of charts regarding the ingested metadata + """ + getMetadataAnalyticsCharts(input: MetadataAnalyticsInput!): [AnalyticsChartGroup!]! } +""" +Input to fetch metadata analytics charts +""" +input MetadataAnalyticsInput { + """ + Entity type to fetch analytics for (If empty, queries across all entities) + """ + entityType: EntityType + + """ + Urn of the domain to fetch analytics for (If empty or GLOBAL, queries across all domains) + """ + domain: String + + """ + Search query to filter down result (If empty, does not apply any search query) + """ + query: String +} + + """ For consumption by UI only """ @@ -27,6 +53,7 @@ union AnalyticsChart = TimeSeriesChart | BarChart | TableChart For consumption by UI only """ type AnalyticsChartGroup { + groupId: String! title: String! charts: [AnalyticsChart!]! } @@ -102,12 +129,41 @@ type BarChart { bars: [NamedBar!]! } + +""" +Parameters required to specify the page to land once clicked +""" +type LinkParams { + """ + Context to define the search page + """ + searchParams: SearchParams + + """ + Context to define the entity profile page + """ + entityProfileParams: EntityProfileParams +} + +""" +For consumption by UI only +""" +type Cell { + value: String! + entity: Entity + linkParams: LinkParams +} + + """ For consumption by UI only """ type Row { - # All values are expected to be strings. + # DEPRECATED All values are expected to be strings. values: [String!]! + + # More detailed information about each cell in the row + cells: [Cell!] } """ diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 9be2ac696c5e6..910ab9b7b76c9 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -277,6 +277,11 @@ type Mutation { Update a particular Corp User's editable properties """ updateCorpUserProperties(urn: String!, input: CorpUserUpdateInput!): CorpUser + + """ + Update a particular Corp Group's editable properties + """ + updateCorpGroupProperties(urn: String!, input: CorpGroupUpdateInput!): CorpGroup } """ @@ -2328,7 +2333,6 @@ type CorpUserEditableProperties { email: String } - """ Arguments provided to update a CorpUser Entity """ @@ -2403,6 +2407,11 @@ type CorpGroup implements Entity { """ properties: CorpGroupProperties + """ + Additional read write properties about the group + """ + editableProperties: CorpGroupEditableProperties + """ Edges extending from this entity """ @@ -2474,6 +2483,46 @@ type CorpGroupProperties { email: String } +""" +Additional read write properties about a group +""" +type CorpGroupEditableProperties { + """ + DataHub description of the group + """ + description: String + + """ + Slack handle for the group + """ + slack: String + + """ + Email address for the group + """ + email: String +} + +""" +Arguments provided to update a CorpGroup Entity +""" +input CorpGroupUpdateInput { + """ + DataHub description of the group + """ + description: String + + """ + Slack handle for the group + """ + slack: String + + """ + Email address for the group + """ + email: String +} + """ An owner of a Metadata Entity, either a user or group """ diff --git a/datahub-graphql-core/src/main/resources/recommendation.graphql b/datahub-graphql-core/src/main/resources/recommendation.graphql index ef2f3f2700d4e..8b522a14f63c1 100644 --- a/datahub-graphql-core/src/main/resources/recommendation.graphql +++ b/datahub-graphql-core/src/main/resources/recommendation.graphql @@ -248,6 +248,12 @@ type EntityProfileParams { Urn of the entity being shown """ urn: String! + + """ + Type of the enity being displayed + """ + type: EntityType! + } """ diff --git a/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsPage.tsx b/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsPage.tsx index 80e9ebf369da3..7f019fb8322fc 100644 --- a/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsPage.tsx +++ b/datahub-web-react/src/app/analyticsDashboard/components/AnalyticsPage.tsx @@ -1,28 +1,88 @@ -import React from 'react'; +import React, { useState } from 'react'; import styled from 'styled-components'; -import { Alert } from 'antd'; +import { Alert, Divider, Input, Select } from 'antd'; +import { SearchOutlined } from '@ant-design/icons'; import { SearchablePage } from '../../search/SearchablePage'; -import { sampleCharts, sampleHighlights } from '../sampleData'; import { ChartGroup } from './ChartGroup'; -import { useGetAnalyticsChartsQuery } from '../../../graphql/analytics.generated'; +import { useGetAnalyticsChartsQuery, useGetMetadataAnalyticsChartsQuery } from '../../../graphql/analytics.generated'; import { useGetHighlightsQuery } from '../../../graphql/highlights.generated'; import { Highlight } from './Highlight'; import { Message } from '../../shared/Message'; +import { useListDomainsQuery } from '../../../graphql/domain.generated'; +import filterSearchQuery from '../../search/utils/filterSearchQuery'; +import { ANTD_GRAY } from '../../entity/shared/constants'; const HighlightGroup = styled.div` display: flex; align-items: space-between; justify-content: center; padding-top: 20px; - margin-bottom: -20px; + margin-bottom: 10px; `; -const IS_DEV = false; +const MetadataAnalyticsInput = styled.div` + display: flex; +`; + +const MetadataAnalyticsPlaceholder = styled.span` + margin: 25px; + margin-bottom: 50px; + font-size: 18px; + color: ${ANTD_GRAY[7]}; +`; + +const DomainSelect = styled(Select)` + margin-left: 25px; + width: 200px; + display: inline-block; +`; + +const StyledSearchBar = styled(Input)` + &&& { + margin-left: 10px; + border-radius: 70px; + color: ${ANTD_GRAY[7]}; + width: 250px; + } +`; export const AnalyticsPage = () => { const { data: chartData, loading: chartLoading, error: chartError } = useGetAnalyticsChartsQuery(); const { data: highlightData, loading: highlightLoading, error: highlightError } = useGetHighlightsQuery(); + const { + loading: domainLoading, + error: domainError, + data: domainData, + } = useListDomainsQuery({ + variables: { + input: { + start: 0, + count: 1000, + }, + }, + fetchPolicy: 'no-cache', + }); + const [domain, setDomain] = useState(''); + const [stagedQuery, setStagedQuery] = useState(''); + const [query, setQuery] = useState(''); + + const onDomainChange = (inputDomain) => setDomain(inputDomain); + const onStagedQueryChange = (inputQuery) => setStagedQuery(inputQuery); + const { + loading: metadataAnalyticsLoading, + error: metadataAnalyticsError, + data: metadataAnalyticsData, + } = useGetMetadataAnalyticsChartsQuery({ + variables: { + input: { + entityType: null, + domain, + query, + }, + }, + skip: domain === '' && query === '', + }); return ( @@ -33,16 +93,91 @@ export const AnalyticsPage = () => { {highlightError && ( )} - {(IS_DEV ? sampleHighlights : highlightData?.getHighlights)?.map((highlight) => ( + {highlightData?.getHighlights?.map((highlight) => ( ))} + <> + {chartLoading && } + {chartError && ( + + )} + {chartData?.getAnalyticsCharts + ?.filter((chartGroup) => chartGroup.groupId === 'GlobalMetadataAnalytics') + .map((chartGroup) => ( + + ))} + + <> + {domainLoading && } + {domainError && ( + + )} + {!chartLoading && ( + <> + + + + option?.children.toLowerCase().indexOf(input.toLowerCase()) >= 0 + } + > + All + {domainData?.listDomains?.domains.map((domainChoice) => ( + + {domainChoice?.properties?.name} + + ))} + + { + e.stopPropagation(); + setQuery(filterSearchQuery(stagedQuery || '')); + }} + value={stagedQuery} + onChange={(e) => onStagedQueryChange(e.target.value)} + data-testid="analytics-search-input" + prefix={ + setQuery(filterSearchQuery(stagedQuery || ''))} /> + } + /> + + + )} + + <> + {metadataAnalyticsLoading && ( + + )} + {metadataAnalyticsError && ( + + )} + {domain === '' && query === '' + ? !chartLoading && ( + + Please specify domain or query to get granular results + + ) + : metadataAnalyticsData?.getMetadataAnalyticsCharts?.map((chartGroup) => ( + + ))} + <> {chartLoading && } {chartError && } - {(IS_DEV ? sampleCharts : chartData?.getAnalyticsCharts)?.map((chartGroup) => ( - - ))} + {!chartLoading && + chartData?.getAnalyticsCharts + ?.filter((chartGroup) => chartGroup.groupId === 'DataHubUsageAnalytics') + .map((chartGroup) => ( + <> + + + + ))} ); diff --git a/datahub-web-react/src/app/analyticsDashboard/components/BarChart.tsx b/datahub-web-react/src/app/analyticsDashboard/components/BarChart.tsx index 31cac1fd4ab24..f178b09afcccc 100644 --- a/datahub-web-react/src/app/analyticsDashboard/components/BarChart.tsx +++ b/datahub-web-react/src/app/analyticsDashboard/components/BarChart.tsx @@ -14,7 +14,8 @@ type Props = { height: number; }; -const MARGIN_SIZE = 32; +const WIDTH_MARGIN_SIZE = 55; +const HEIGHT_MARGIN_SIZE = 32; function transformName(label: string) { if (label === 'DATA_JOB') { @@ -27,17 +28,21 @@ function transformName(label: string) { } function transformChartData(chartData: BarChartType) { - return chartData.bars.map((bar, i) => ({ - index: i, - name: transformName(bar.name), - ...bar.segments.reduce( - (obj, segment) => ({ - ...obj, - [segment.label]: segment.value, - }), - {}, - ), - })); + return chartData.bars.map((bar, i) => { + const name = transformName(bar.name); + return { + index: i, + name, + displayName: name.length > 15 ? `${name.substring(0, Math.min(15, name.length))}...` : name, + ...bar.segments.reduce( + (obj, segment) => ({ + ...obj, + [segment.label]: segment.value, + }), + {}, + ), + }; + }); } export const BarChart = ({ chartData, width, height }: Props) => { @@ -65,25 +70,25 @@ export const BarChart = ({ chartData, width, height }: Props) => { }); const xAxisScale = scaleBand({ - domain: transformedChartData.map((bar) => bar.name), + domain: transformedChartData.map((bar) => bar.displayName), padding: 0.2, }); - const xMax = width - MARGIN_SIZE; - const yMax = height - MARGIN_SIZE - 80; + const xMax = width - WIDTH_MARGIN_SIZE; + const yMax = height - HEIGHT_MARGIN_SIZE - 80; xAxisScale.rangeRound([0, xMax]); yAxisScale.range([yMax, 0]); return ( <> - + - + data={transformedChartData} keys={keys} - x={(data) => data.name} + x={(data) => data.displayName} xScale={xAxisScale} yScale={yAxisScale} color={segmentScale} @@ -101,7 +106,15 @@ export const BarChart = ({ chartData, width, height }: Props) => { width={bar.width} fill={bar.color} > - {bar.bar[1] - bar.bar[0]} + + {barStacks.length === 1 + ? `${transformedChartData[bar.index].name}, ${ + bar.bar[1] - bar.bar[0] + }` + : `${transformedChartData[bar.index].name}, ${bar.key}, ${ + bar.bar[1] - bar.bar[0] + }`} + )), ); @@ -109,8 +122,8 @@ export const BarChart = ({ chartData, width, height }: Props) => { ({ fontSize: 11, @@ -121,8 +134,8 @@ export const BarChart = ({ chartData, width, height }: Props) => { ({ fontSize: 10, diff --git a/datahub-web-react/src/app/analyticsDashboard/components/ChartGroup.tsx b/datahub-web-react/src/app/analyticsDashboard/components/ChartGroup.tsx index 3e123473c744d..7185d92b99832 100644 --- a/datahub-web-react/src/app/analyticsDashboard/components/ChartGroup.tsx +++ b/datahub-web-react/src/app/analyticsDashboard/components/ChartGroup.tsx @@ -32,10 +32,12 @@ type Props = { export const ChartGroup = ({ chartGroup }: Props) => { return ( - - {chartGroup.title} - - + {chartGroup.title?.length > 0 && ( + + {chartGroup.title} + + + )} {chartGroup.charts.map((chart) => ( diff --git a/datahub-web-react/src/app/analyticsDashboard/components/TableChart.tsx b/datahub-web-react/src/app/analyticsDashboard/components/TableChart.tsx index a739f3826dba1..ac3ce3733c2e7 100644 --- a/datahub-web-react/src/app/analyticsDashboard/components/TableChart.tsx +++ b/datahub-web-react/src/app/analyticsDashboard/components/TableChart.tsx @@ -1,27 +1,88 @@ import React from 'react'; -import { Table } from 'antd'; +import { Button, Table } from 'antd'; import styled from 'styled-components'; +import { useHistory } from 'react-router'; -import { TableChart as TableChartType } from '../../../types.generated'; +import { Cell, EntityType, FacetFilterInput, TableChart as TableChartType } from '../../../types.generated'; +import { navigateToSearchUrl } from '../../search/utils/navigateToSearchUrl'; +import { useEntityRegistry } from '../../useEntityRegistry'; type Props = { chartData: TableChartType; }; +type TableCellProps = { + cell: Cell; +}; + const StyledTable = styled(Table)` padding-top: 16px; width: 100%; `; +const TableLink = styled(Button)` + &&& { + padding: 0px; + font-weight: 400; + margin-top: -6px; + margin-bottom: -6px; + } +`; + +const TableCell = ({ cell }: TableCellProps) => { + const history = useHistory(); + const entityRegistry = useEntityRegistry(); + const onClickQuery = (query: string, types: Array, filters: Array) => { + navigateToSearchUrl({ + query, + type: (types && types.length > 0 && types[0]) || undefined, + filters: filters || [], + history, + }); + }; + + if (cell.linkParams?.searchParams) { + return ( + + onClickQuery( + cell.linkParams?.searchParams?.query || '', + cell.linkParams?.searchParams?.types || [], + cell.linkParams?.searchParams?.filters || [], + ) + } + > + {cell.value} + + ); + } + if (cell.linkParams?.entityProfileParams) { + return ( + + {cell.value} + + ); + } + return {cell.value}; +}; + export const TableChart = ({ chartData }: Props) => { const columns = chartData.columns.map((column) => ({ title: column, key: column, dataIndex: column, + render: (cell) => , })); - const tableData = chartData.rows.map((row) => - row.values.reduce((acc, value, i) => ({ ...acc, [chartData.columns[i]]: value }), {}), + const tableData = chartData.rows.map( + (row) => row.cells?.reduce((acc, cell, i) => ({ ...acc, [chartData.columns[i]]: cell }), {}) || {}, ); return ; }; diff --git a/datahub-web-react/src/app/analyticsDashboard/components/lineColors.ts b/datahub-web-react/src/app/analyticsDashboard/components/lineColors.ts index ab3f313393393..669a76067e66c 100644 --- a/datahub-web-react/src/app/analyticsDashboard/components/lineColors.ts +++ b/datahub-web-react/src/app/analyticsDashboard/components/lineColors.ts @@ -1,6 +1,7 @@ import { cyan, gold, lime, orange, purple, red, volcano, yellow } from '@ant-design/colors'; export const lineColors = [ + cyan[6], red[3], orange[4], yellow[5], diff --git a/datahub-web-react/src/app/analyticsDashboard/sampleData.ts b/datahub-web-react/src/app/analyticsDashboard/sampleData.ts deleted file mode 100644 index 449f8eae17c28..0000000000000 --- a/datahub-web-react/src/app/analyticsDashboard/sampleData.ts +++ /dev/null @@ -1,121 +0,0 @@ -import { - AnalyticsChart, - AnalyticsChartGroup, - BarSegment, - DateInterval, - NamedBar, - NamedLine, - NumericDataPoint, - TableChart, -} from '../../types.generated'; - -const dateOffset = 24 * 60 * 60 * 1000; // 1 day - -function generatePoints(length: number, start: Date, interval: number) { - const output: NumericDataPoint[] = []; - const iterateDate = new Date(start); - let iterateVal = Math.random() * 20; - for (let i = 0; i < length; i++) { - iterateVal += Math.random() * 5; - output.push({ - y: iterateVal, - x: iterateDate.toString(), - }); - iterateDate.setTime(iterateDate.getTime() + interval); - } - return output; -} - -export function generateSampleTimeSeries(title: string, lines: number): AnalyticsChart { - const allLines: NamedLine[] = []; - for (let i = 0; i < lines; i++) { - const lineTitle = Math.random().toString(36).substring(9); - allLines.push({ - name: `${lineTitle} per day`, - data: generatePoints(8, new Date(), dateOffset), - }); - } - - return { - title, - lines: allLines, - dateRange: { - start: new Date().getTime().toString(), - end: new Date(new Date().getDate() - 6).getTime().toString(), - }, - interval: DateInterval.Day, - __typename: 'TimeSeriesChart', - }; -} - -function generateBars(length: number) { - const output: BarSegment[] = []; - let iterateVal = Math.random() * 20; - for (let i = 0; i < length; i++) { - iterateVal += Math.random() * 5; - output.push({ - value: iterateVal, - label: `Segment ${i}`, - }); - } - return output; -} - -export function generateSampleBarChart(title: string, lines: number): AnalyticsChart { - const allBars: NamedBar[] = []; - for (let i = 0; i < lines; i++) { - const barTitle = Math.random().toString(36).substring(9); - allBars.push({ - name: `${barTitle} per day`, - segments: generateBars(11), - }); - } - - return { - title, - bars: allBars, - __typename: 'BarChart', - }; -} - -export function generateSampleTableChart(title: string): TableChart { - return { - title, - columns: ['Query', '# Searches', '% Searches'], - rows: [ - { values: ['lineage', '331', '4.34%'] }, - { values: ['presto', '22', '4.34%'] }, - { values: ['snowflake', '12', '4.34%'] }, - ], - __typename: 'TableChart', - }; -} - -export function generateSampleChartGroup(title: string, charts: number): AnalyticsChartGroup { - const constructedCharts: AnalyticsChart[] = []; - for (let i = 0; i < charts; i++) { - const chartTitle = Math.random().toString(36).substring(9); - constructedCharts.push(generateSampleTimeSeries(`${chartTitle} over time`, Math.floor(Math.random() * 3 + 4))); - } - constructedCharts.push(generateSampleBarChart('Bar Sample', 7)); - constructedCharts.push(generateSampleTableChart('Top Search Results')); - - return { - title, - charts: constructedCharts, - }; -} - -export const sampleHighlights = [ - { - value: 42, - title: 'Weekly Active Users', - body: '22% increase vs last week', - }, -]; - -export const sampleCharts: AnalyticsChartGroup[] = [ - generateSampleChartGroup('Overview', 3), - generateSampleChartGroup('Searches', 6), - generateSampleChartGroup('Entity Detail', 4), -]; diff --git a/datahub-web-react/src/app/entity/domain/preview/Preview.tsx b/datahub-web-react/src/app/entity/domain/preview/Preview.tsx index d31b5bc762b70..ae021c83bf977 100644 --- a/datahub-web-react/src/app/entity/domain/preview/Preview.tsx +++ b/datahub-web-react/src/app/entity/domain/preview/Preview.tsx @@ -21,7 +21,6 @@ export const Preview = ({ logoComponent?: JSX.Element; }): JSX.Element => { const entityRegistry = useEntityRegistry(); - console.log(`Find a way to use count ${count}`); return ( ); }; diff --git a/datahub-web-react/src/app/entity/group/Group.tsx b/datahub-web-react/src/app/entity/group/Group.tsx index e8f39f0776eba..be5d69fe7dc5f 100644 --- a/datahub-web-react/src/app/entity/group/Group.tsx +++ b/datahub-web-react/src/app/entity/group/Group.tsx @@ -51,7 +51,7 @@ export class GroupEntity implements Entity { renderPreview = (_: PreviewType, data: CorpGroup) => ( @@ -62,7 +62,7 @@ export class GroupEntity implements Entity { }; displayName = (data: CorpGroup) => { - return data.info?.displayName || data.name; + return data.properties?.displayName || data.info?.displayName || data.name; }; getGenericEntityProperties = (group: CorpGroup) => { diff --git a/datahub-web-react/src/app/entity/shared/components/styled/ExpandedOwner.tsx b/datahub-web-react/src/app/entity/shared/components/styled/ExpandedOwner.tsx index 82851acaa1691..446e734f4eac9 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/ExpandedOwner.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/ExpandedOwner.tsx @@ -37,7 +37,8 @@ export const ExpandedOwner = ({ entityUrn, owner, refetch }: Props) => { name = entityRegistry.getDisplayName(EntityType.CorpUser, owner.owner); } - const pictureLink = (owner.owner.__typename === 'CorpUser' && owner.owner.editableInfo?.pictureLink) || undefined; + const pictureLink = + (owner.owner.__typename === 'CorpUser' && owner.owner.editableProperties?.pictureLink) || undefined; const onDelete = async () => { try { diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx index d1b6802c5ff61..e2541f5bf5691 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearch.tsx @@ -2,6 +2,7 @@ import React, { useState } from 'react'; import * as QueryString from 'query-string'; import { useHistory, useLocation, useParams } from 'react-router'; import { message } from 'antd'; +import styled from 'styled-components'; import { ApolloError } from '@apollo/client'; import { useEntityRegistry } from '../../../../../useEntityRegistry'; @@ -15,6 +16,11 @@ import EmbeddedListSearchHeader from './EmbeddedListSearchHeader'; import { useGetSearchResultsForMultipleQuery } from '../../../../../../graphql/search.generated'; import { GetSearchResultsParams, SearchResultInterface } from './types'; +const Container = styled.div` + overflow: scroll; + height: 120; +`; + // this extracts the response from useGetSearchResultsForMultipleQuery into a common interface other search endpoints can also produce function useWrappedSearchResults(params: GetSearchResultsParams) { const { data, loading, error } = useGetSearchResultsForMultipleQuery(params); @@ -134,7 +140,7 @@ export const EmbeddedListSearch = ({ const filteredFilters = data?.facets?.filter((facet) => facet.field !== fixedFilter?.field) || []; return ( - <> + {error && message.error(`Failed to complete search: ${error && error.message}`)} - + ); }; diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx index 4ca99dc746e04..b7a9c4ba6761b 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx @@ -86,10 +86,10 @@ const TabContent = styled.div` const resizerStyles = { background: '#E9E9E9', - width: '2px', + width: '1px', cursor: 'col-resize', margin: '0 5px', - height: '100%', + height: 'auto', }; const defaultTabDisplayConfig = { @@ -239,10 +239,14 @@ export const EntityProfile = ({ ) : ( diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/AddOwnerModal.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/AddOwnerModal.tsx index 6c5a5da295adb..fa01ad24c2d23 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/AddOwnerModal.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/AddOwnerModal.tsx @@ -177,7 +177,6 @@ export const AddOwnerModal = ({ visible, onClose, refetch }: Props) => { useEnterKeyListener({ querySelectorToExecuteClick: '#addOwnerButton', }); - return ( { setData({ ...data, name: event.target.value })} + /> + + Title/Role} + rules={[{ whitespace: true }, { min: 2, max: 50 }]} + hasFeedback + > + setData({ ...data, title: event.target.value })} + /> + + Image URL} + rules={[{ whitespace: true }, { type: 'url', message: 'not valid url' }]} + hasFeedback + > + setData({ ...data, image: event.target.value })} + /> + + Team} + rules={[{ whitespace: true }, { min: 2, max: 50 }]} + > + setData({ ...data, team: event.target.value })} + /> + + Email} + rules={[ + { + required: true, + message: 'Enter your email', + }, + { + type: 'email', + message: 'Please enter valid email', + }, + { whitespace: true }, + { min: 2, max: 50 }, + ]} + hasFeedback + > + setData({ ...data, email: event.target.value })} + /> + + Slack} + rules={[{ whitespace: true }, { min: 2, max: 50 }]} + hasFeedback + > + setData({ ...data, slack: event.target.value })} + /> + + Phone} + rules={[ + { + pattern: new RegExp('^(?=.*[0-9])[- +()0-9]+$'), + message: 'not valid phone number', + }, + { + min: 5, + max: 15, + }, + ]} + hasFeedback + > + setData({ ...data, phone: event.target.value })} + /> + + + + ); +} diff --git a/datahub-web-react/src/app/entity/user/UserGroups.tsx b/datahub-web-react/src/app/entity/user/UserGroups.tsx index 39d03ad12b38e..bfc621ad4f8be 100644 --- a/datahub-web-react/src/app/entity/user/UserGroups.tsx +++ b/datahub-web-react/src/app/entity/user/UserGroups.tsx @@ -1,10 +1,10 @@ -import { List, Pagination, Row, Space, Typography } from 'antd'; +import { Col, Pagination, Row, Tooltip } from 'antd'; import React, { useState } from 'react'; +import { Link } from 'react-router-dom'; import styled from 'styled-components'; import { useGetUserGroupsLazyQuery } from '../../../graphql/user.generated'; import { CorpGroup, EntityRelationshipsResult, EntityType } from '../../../types.generated'; import { useEntityRegistry } from '../../useEntityRegistry'; -import { PreviewType } from '../Entity'; type Props = { urn: string; @@ -12,30 +12,68 @@ type Props = { pageSize: number; }; -const GroupList = styled(List)` - &&& { +const GroupsViewWrapper = styled.div` + height: calc(100vh - 173px); + overflow-y: auto; + + .user-group-pagination { + justify-content: center; + bottom: 24px; + position: absolute; width: 100%; - border-color: ${(props) => props.theme.styles['border-color-base']}; - margin-top: 12px; - margin-bottom: 28px; - padding: 24px 32px; - box-shadow: ${(props) => props.theme.styles['box-shadow']}; + left: 50%; + -webkit-transform: translateX(-50%); + -moz-transform: translateX(-50%); + -webkit-transform: translateX(-50%); + -ms-transform: translateX(-50%); + transform: translateX(-50%); } - & li { - padding-top: 28px; - padding-bottom: 28px; +`; + +const GroupItemColumn = styled(Col)` + padding: 10px; +`; + +const GroupItem = styled.div` + border: 1px solid #eaeaea; + padding: 10px; + min-height: 107px; + max-height: 107px; + border-radius: 5px; + + .title-row { + padding: 9px 11px 9px 11px; } - & li:not(:last-child) { - border-bottom: 1.5px solid #ededed; + .description-row { + padding: 2px 13px; } `; -const GroupsView = styled(Space)` - width: 100%; - margin-bottom: 32px; - padding-top: 28px; +const GroupTitle = styled.span` + font-size: 14px; + line-height: 22px; + font-weight: bold; + color: #262626; `; +const GroupMember = styled.span` + font-weight: 500; + font-size: 12px; + line-height: 23px; + color: #8c8c8c; + padding-left: 7px; +`; + +const GroupDescription = styled.span` + font-weight: 500; + font-size: 12px; + line-height: 20px; + color: #262626; + overflow: hidden; + text-overflow: ellipsis; + max-width: 100%; + height: 43px; +`; export default function UserGroups({ urn, initialRelationships, pageSize }: Props) { const [page, setPage] = useState(1); const entityRegistry = useEntityRegistry(); @@ -53,19 +91,35 @@ export default function UserGroups({ urn, initialRelationships, pageSize }: Prop const userGroups = relationships?.relationships?.map((rel) => rel.entity as CorpGroup) || []; return ( - - Group Membership - - ( - - {entityRegistry.renderPreview(EntityType.CorpGroup, PreviewType.PREVIEW, item)} - - )} - bordered - /> + + + {userGroups && + userGroups.map((item) => { + return ( + + + + + {item.info?.displayName || item.name} + + {item.relationships?.total} + {item.relationships?.total === 1 ? ' member' : ' members'} + + + + + + {item.info?.description} + + + + + + + ); + })} + + - + ); } diff --git a/datahub-web-react/src/app/entity/user/UserInfoSideBar.tsx b/datahub-web-react/src/app/entity/user/UserInfoSideBar.tsx new file mode 100644 index 0000000000000..64950dd0a5f98 --- /dev/null +++ b/datahub-web-react/src/app/entity/user/UserInfoSideBar.tsx @@ -0,0 +1,337 @@ +import { Divider, message, Space, Button, Tag, Typography } from 'antd'; +import React, { useState } from 'react'; +import styled from 'styled-components'; +import { EditOutlined, MailOutlined, PhoneOutlined, SlackOutlined } from '@ant-design/icons'; +import { Link } from 'react-router-dom'; +import { useUpdateCorpUserPropertiesMutation } from '../../../graphql/user.generated'; +import { EntityType } from '../../../types.generated'; + +import UserEditProfileModal from './UserEditProfileModal'; +import { ExtendedEntityRelationshipsResult } from './type'; +import CustomAvatar from '../../shared/avatar/CustomAvatar'; +import { useEntityRegistry } from '../../useEntityRegistry'; +import { useGetAuthenticatedUser } from '../../useGetAuthenticatedUser'; + +const { Paragraph } = Typography; + +type SideBarData = { + photoUrl: string | undefined; + avatarName: string | undefined; + name: string | undefined; + role: string | undefined; + team: string | undefined; + email: string | undefined; + slack: string | undefined; + phone: string | undefined; + aboutText: string | undefined; + groupsDetails: ExtendedEntityRelationshipsResult; + urn: string | undefined; +}; + +type Props = { + sideBarData: SideBarData; + refetch: () => void; +}; + +const AVATAR_STYLE = { marginTop: '14px' }; + +/** + * Styled Components + */ +export const SideBar = styled.div` + padding: 0 0 0 17px; + text-align: center; + + font-style: normal; + font-weight: bold; + height: calc(100vh - 60px); + position: relative; + + &&& .ant-avatar.ant-avatar-icon { + font-size: 46px !important; + } + + .divider-infoSection { + margin: 18px 0px 18px 0; + } + .divider-aboutSection { + margin: 23px 0px 11px 0; + } + .divider-groupsSection { + margin: 23px 0px 11px 0; + } +`; + +export const SideBarSubSection = styled.div` + height: calc(100vh - 135px); + overflow: auto; + padding-right: 18px; + &.fullView { + height: calc(100vh - 70px); + } + &::-webkit-scrollbar { + height: 12px; + width: 1px; + background: #d6d6d6; + } + &::-webkit-scrollbar-thumb { + background: #d6d6d6; + -webkit-border-radius: 1ex; + -webkit-box-shadow: 0px 1px 2px rgba(0, 0, 0, 0.75); + } +`; + +export const EmptyValue = styled.div` + &:after { + content: 'None'; + color: #b7b7b7; + font-style: italic; + font-weight: 100; + } +`; + +export const Name = styled.div` + font-size: 20px; + line-height: 28px; + color: #262626; + margin: 13px 0 7px 0; +`; + +export const Role = styled.div` + font-size: 14px; + line-height: 22px; + color: #595959; + margin-bottom: 7px; +`; + +export const Team = styled.div` + font-size: 12px; + line-height: 20px; + color: #8c8c8c; +`; + +export const SocialDetails = styled.div` + font-size: 12px; + line-height: 20px; + color: #262626; + text-align: left; + margin: 6px 0; +`; + +export const EditProfileButton = styled.div` + bottom: 24px; + position: absolute; + right: 27px; + width: 80%; + left: 50%; + -webkit-transform: translateX(-50%); + -moz-transform: translateX(-50%); + transform: translateX(-50%); + + button { + width: 100%; + font-size: 12px; + line-height: 20px; + color: #262626; + } +`; + +export const AboutSection = styled.div` + text-align: left; + font-weight: bold; + font-size: 14px; + line-height: 22px; + color: #262626; +`; + +export const AboutSectionText = styled.div` + font-size: 12px; + font-weight: 100; + line-height: 15px; + padding: 5px 0; + + &&& .ant-typography { + margin-bottom: 0; + } + &&& .ant-typography-edit-content { + padding-left: 15px; + padding-top: 5px; + } +`; + +export const GroupsSection = styled.div` + text-align: left; + font-weight: bold; + font-size: 14px; + line-height: 22px; + color: #262626; +`; + +export const TagsSection = styled.div` + height: calc(75vh - 460px); + padding: 5px; +`; + +export const NoDataFound = styled.span` + font-size: 12px; + color: #262626; + font-weight: 100; +`; + +export const Tags = styled.div` + margin-top: 5px; +`; + +export const GroupsSeeMoreText = styled.span` + font-weight: 500; + font-size: 12px; + line-height: 20px; + color: #1890ff; + cursor: pointer; +`; + +/** + * Responsible for reading & writing users. + */ +export default function UserInfoSideBar({ sideBarData, refetch }: Props) { + const { name, aboutText, avatarName, email, groupsDetails, phone, photoUrl, role, slack, team, urn } = sideBarData; + + const [updateCorpUserPropertiesMutation] = useUpdateCorpUserPropertiesMutation(); + const entityRegistry = useEntityRegistry(); + + const [groupSectionExpanded, setGroupSectionExpanded] = useState(false); + const [editProfileModal, showEditProfileModal] = useState(false); + /* eslint-disable @typescript-eslint/no-unused-vars */ + const me = useGetAuthenticatedUser(); + const isProfileOwner = me?.corpUser?.urn === urn; + + const getEditModalData = { + urn, + name, + title: role, + team, + email, + image: photoUrl, + slack, + phone, + }; + + // About Text save + const onSaveAboutMe = (inputString) => { + updateCorpUserPropertiesMutation({ + variables: { + urn: urn || '', + input: { + aboutMe: inputString, + }, + }, + }) + .catch((e) => { + message.destroy(); + message.error({ content: `Failed to Save changes!: \n ${e.message || ''}`, duration: 3 }); + }) + .finally(() => { + message.success({ + content: `Changes saved.`, + duration: 3, + }); + refetch(); + }); + }; + return ( + <> + + + + {name || } + {role && {role}} + {team && {team}} + + + + + {email || } + + + + + + {slack || } + + + + + + {phone || } + + + + + About + + + {aboutText || } + + + + + + Groups + + {groupsDetails?.relationships.length === 0 && } + {!groupSectionExpanded && + groupsDetails?.relationships.slice(0, 2).map((item) => { + return ( + + + + {entityRegistry.getDisplayName(EntityType.CorpGroup, item.entity)} + + + + ); + })} + {groupSectionExpanded && + groupsDetails?.relationships.length > 2 && + groupsDetails?.relationships.map((item) => { + return ( + + + + {entityRegistry.getDisplayName(EntityType.CorpGroup, item.entity)} + + + + ); + })} + {!groupSectionExpanded && groupsDetails?.relationships.length > 2 && ( + setGroupSectionExpanded(!groupSectionExpanded)}> + {`+${groupsDetails?.relationships.length - 2} more`} + + )} + + + + {isProfileOwner && ( + + + + )} + + {/* Modal */} + showEditProfileModal(false)} + onSave={() => { + refetch(); + }} + editModalData={getEditModalData} + /> + + ); +} diff --git a/datahub-web-react/src/app/entity/user/UserProfile.tsx b/datahub-web-react/src/app/entity/user/UserProfile.tsx index fedc0f3f61059..f5bae0bc1f49d 100644 --- a/datahub-web-react/src/app/entity/user/UserProfile.tsx +++ b/datahub-web-react/src/app/entity/user/UserProfile.tsx @@ -1,73 +1,81 @@ -import { Alert } from 'antd'; -import React, { useMemo } from 'react'; -import UserHeader from './UserHeader'; +import { Alert, Col, Row } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; import useUserParams from '../../shared/entitySearch/routingUtils/useUserParams'; import { useGetUserQuery } from '../../../graphql/user.generated'; -import { useGetAllEntitySearchResults } from '../../../utils/customGraphQL/useGetAllEntitySearchResults'; -import { Message } from '../../shared/Message'; -import RelatedEntityResults from '../../shared/entitySearch/RelatedEntityResults'; -import { LegacyEntityProfile } from '../../shared/LegacyEntityProfile'; -import { CorpUser, EntityType, SearchResult, EntityRelationshipsResult } from '../../../types.generated'; +import { EntityRelationshipsResult } from '../../../types.generated'; import UserGroups from './UserGroups'; -import { useEntityRegistry } from '../../useEntityRegistry'; +import { RoutedTabs } from '../../shared/RoutedTabs'; +import { UserAssets } from './UserAssets'; +import { ExtendedEntityRelationshipsResult } from './type'; import { decodeUrn } from '../shared/utils'; +import UserInfoSideBar from './UserInfoSideBar'; -const messageStyle = { marginTop: '10%' }; +export interface Props { + onTabChange: (selectedTab: string) => void; +} export enum TabType { - Ownership = 'Ownership', + Assets = 'Assets', Groups = 'Groups', } -const ENABLED_TAB_TYPES = [TabType.Ownership, TabType.Groups]; +const ENABLED_TAB_TYPES = [TabType.Assets, TabType.Groups]; const GROUP_PAGE_SIZE = 20; +/** + * Styled Components + */ +const UserProfileWrapper = styled.div` + &&& .ant-tabs-nav { + margin: 0; + } +`; + +const Content = styled.div` + color: #262626; + height: calc(100vh - 60px); + + &&& .ant-tabs > .ant-tabs-nav .ant-tabs-nav-wrap { + padding-left: 15px; + } +`; + +export const EmptyValue = styled.div` + &:after { + content: 'None'; + color: #b7b7b7; + font-style: italic; + font-weight: 100; + } +`; + /** * Responsible for reading & writing users. */ export default function UserProfile() { const { urn: encodedUrn } = useUserParams(); const urn = decodeUrn(encodedUrn); - const { loading, error, data } = useGetUserQuery({ variables: { urn, groupsCount: GROUP_PAGE_SIZE } }); - const entityRegistry = useEntityRegistry(); - const username = data?.corpUser?.username; - - const ownershipResult = useGetAllEntitySearchResults({ - query: `owners:${username}`, - }); - - const contentLoading = - Object.keys(ownershipResult).some((type) => { - return ownershipResult[type].loading; - }) || loading; - - const ownershipForDetails = useMemo(() => { - const filteredOwnershipResult: { - [key in EntityType]?: Array; - } = {}; - Object.keys(ownershipResult).forEach((type) => { - const entities = ownershipResult[type].data?.search?.searchResults; + const { loading, error, data, refetch } = useGetUserQuery({ variables: { urn, groupsCount: GROUP_PAGE_SIZE } }); - if (entities && entities.length > 0) { - filteredOwnershipResult[type] = ownershipResult[type].data?.search?.searchResults; - } - }); - return filteredOwnershipResult; - }, [ownershipResult]); + const groupMemberRelationships = data?.corpUser?.relationships as EntityRelationshipsResult; + const groupsDetails = data?.corpUser?.relationships as ExtendedEntityRelationshipsResult; if (error || (!loading && !error && !data)) { return ; } - const groupMemberRelationships = data?.corpUser?.relationships as EntityRelationshipsResult; - + // Routed Tabs Constants const getTabs = () => { return [ { - name: TabType.Ownership, - path: TabType.Ownership.toLocaleLowerCase(), - content: , + name: TabType.Assets, + path: TabType.Assets.toLocaleLowerCase(), + content: , + display: { + enabled: () => true, + }, }, { name: TabType.Groups, @@ -75,36 +83,47 @@ export default function UserProfile() { content: ( ), + display: { + enabled: () => groupsDetails?.relationships.length > 0, + }, }, ].filter((tab) => ENABLED_TAB_TYPES.includes(tab.name)); }; + const defaultTabPath = getTabs() && getTabs()?.length > 0 ? getTabs()[0].path : ''; + const onTabChange = () => null; - const getHeader = (user: CorpUser) => { - const { editableInfo, info } = user; - const displayName = entityRegistry.getDisplayName(EntityType.CorpUser, user); - return ( - - ); + // Side bar data + const sideBarData = { + photoUrl: data?.corpUser?.editableProperties?.pictureLink || undefined, + avatarName: + data?.corpUser?.editableProperties?.displayName || + data?.corpUser?.info?.displayName || + data?.corpUser?.info?.fullName || + data?.corpUser?.urn, + name: data?.corpUser?.editableProperties?.displayName || data?.corpUser?.info?.fullName || undefined, + role: data?.corpUser?.editableProperties?.title || data?.corpUser?.info?.title || undefined, + team: data?.corpUser?.editableProperties?.teams?.join(',') || undefined, + email: data?.corpUser?.editableProperties?.email || data?.corpUser?.info?.email || undefined, + slack: data?.corpUser?.editableProperties?.slack || undefined, + phone: data?.corpUser?.editableProperties?.phone || undefined, + aboutText: data?.corpUser?.editableProperties?.aboutMe || undefined, + groupsDetails: data?.corpUser?.relationships as ExtendedEntityRelationshipsResult, + urn, }; - return ( <> - {contentLoading && } - {data && data.corpUser && ( - - )} + + + + + + + + + + + + ); } diff --git a/datahub-web-react/src/app/entity/user/type.ts b/datahub-web-react/src/app/entity/user/type.ts new file mode 100644 index 0000000000000..ab027e9becc1f --- /dev/null +++ b/datahub-web-react/src/app/entity/user/type.ts @@ -0,0 +1,14 @@ +import { EntityRelationshipsResult, EntityRelationship, Entity, CorpGroupProperties } from '../../../types.generated'; + +export interface ExtendedEntityRelationshipsResult extends EntityRelationshipsResult { + relationships: Array; +} + +interface ExtendedEntityRelationship extends EntityRelationship { + entity: ExtendedEntity; +} + +interface ExtendedEntity extends Entity { + info: CorpGroupProperties; + name: string; +} diff --git a/datahub-web-react/src/app/home/HomePageHeader.tsx b/datahub-web-react/src/app/home/HomePageHeader.tsx index f2655d8efd2af..c3c7e6c9688af 100644 --- a/datahub-web-react/src/app/home/HomePageHeader.tsx +++ b/datahub-web-react/src/app/home/HomePageHeader.tsx @@ -204,7 +204,7 @@ export const HomePageHeader = () => { {!!user && ( <> - Welcome back, {user.info?.firstName || user.username}. + Welcome back, {entityRegistry.getDisplayName(EntityType.CorpUser, user)}. )} @@ -212,8 +212,8 @@ export const HomePageHeader = () => { @@ -242,7 +242,7 @@ export const HomePageHeader = () => { onClick={() => navigateToSearchUrl({ type: undefined, - query: suggestion, + query: `"${suggestion}"`, history, }) } diff --git a/datahub-web-react/src/app/identity/user/UserListItem.tsx b/datahub-web-react/src/app/identity/user/UserListItem.tsx index dde6b14d94738..3e7d3a88ee681 100644 --- a/datahub-web-react/src/app/identity/user/UserListItem.tsx +++ b/datahub-web-react/src/app/identity/user/UserListItem.tsx @@ -96,7 +96,11 @@ export default function UserListItem({ user, onDelete }: Props) { - +
{displayName} diff --git a/datahub-web-react/src/app/search/SearchFilterLabel.tsx b/datahub-web-react/src/app/search/SearchFilterLabel.tsx index 5f6bcdfa7223f..865982da42145 100644 --- a/datahub-web-react/src/app/search/SearchFilterLabel.tsx +++ b/datahub-web-react/src/app/search/SearchFilterLabel.tsx @@ -11,12 +11,16 @@ import { EntityType, GlossaryTerm, Tag as TagType, + CorpUser, + CorpGroup, } from '../../types.generated'; import { StyledTag } from '../entity/shared/components/styled/StyledTag'; import { capitalizeFirstLetter } from '../shared/textUtil'; import { DomainLink } from '../shared/tags/DomainLink'; import { useEntityRegistry } from '../useEntityRegistry'; import { ENTITY_FILTER_NAME } from './utils/constants'; +import CustomAvatar from '../shared/avatar/CustomAvatar'; +import { IconStyleType } from '../entity/Entity'; type Props = { aggregation: AggregationMetadata; @@ -56,13 +60,44 @@ export const SearchFilterLabel = ({ aggregation, field }: Props) => { ); } + if (aggregation.entity?.type === EntityType.CorpUser) { + const user = aggregation.entity as CorpUser; + const displayName = entityRegistry.getDisplayName(EntityType.CorpUser, user); + return ( + <> + + {displayName} ({countText}) + + ); + } + + if (aggregation.entity?.type === EntityType.CorpGroup) { + const group = aggregation.entity as CorpGroup; + return ( + <> + + {entityRegistry.getIcon(EntityType.CorpGroup, 16, IconStyleType.ACCENT)} + + {entityRegistry.getDisplayName(EntityType.CorpGroup, group)} ({countText}) + + ); + } + if (aggregation.entity?.type === EntityType.GlossaryTerm) { const term = aggregation.entity as GlossaryTerm; return ( <> - {term.name} + {entityRegistry.getDisplayName(EntityType.GlossaryTerm, term)} ({countText}) @@ -101,7 +136,8 @@ export const SearchFilterLabel = ({ aggregation, field }: Props) => { const domain = aggregation.entity as Domain; return ( <> - + ( + {countText}) ); } diff --git a/datahub-web-react/src/app/search/SearchablePage.tsx b/datahub-web-react/src/app/search/SearchablePage.tsx index f4167b1ec170c..3097caaa07d4d 100644 --- a/datahub-web-react/src/app/search/SearchablePage.tsx +++ b/datahub-web-react/src/app/search/SearchablePage.tsx @@ -86,7 +86,7 @@ export const SearchablePage = ({ initialQuery, onSearch, onAutoComplete, childre onSearch={onSearch || search} onQueryChange={onAutoComplete || autoComplete} authenticatedUserUrn={user?.urn || ''} - authenticatedUserPictureLink={user?.editableInfo?.pictureLink} + authenticatedUserPictureLink={user?.editableProperties?.pictureLink} entityRegistry={entityRegistry} />
{children}
diff --git a/datahub-web-react/src/app/search/utils/constants.ts b/datahub-web-react/src/app/search/utils/constants.ts index 889c15d1a7055..b41575135ba76 100644 --- a/datahub-web-react/src/app/search/utils/constants.ts +++ b/datahub-web-react/src/app/search/utils/constants.ts @@ -4,6 +4,15 @@ export const SEARCH_FOR_ENTITY_PREFIX = 'SEARCH__'; export const ENTITY_FILTER_NAME = 'entity'; export const TAG_FILTER_NAME = 'tags'; export const GLOSSARY_FILTER_NAME = 'glossaryTerms'; +export const CONTAINER_FILTER_NAME = 'container'; +export const DOMAINS_FILTER_NAME = 'domains'; +export const OWNERS_FILTER_NAME = 'owners'; -export const FILTERS_TO_TRUNCATE = [TAG_FILTER_NAME, GLOSSARY_FILTER_NAME]; +export const FILTERS_TO_TRUNCATE = [ + TAG_FILTER_NAME, + GLOSSARY_FILTER_NAME, + CONTAINER_FILTER_NAME, + DOMAINS_FILTER_NAME, + OWNERS_FILTER_NAME, +]; export const TRUNCATED_FILTER_LENGTH = 5; diff --git a/datahub-web-react/src/app/shared/RoutedTabs.tsx b/datahub-web-react/src/app/shared/RoutedTabs.tsx index d95cd84325578..b1c0749a91a17 100644 --- a/datahub-web-react/src/app/shared/RoutedTabs.tsx +++ b/datahub-web-react/src/app/shared/RoutedTabs.tsx @@ -12,6 +12,9 @@ interface Props extends TabsProps { name: string; path: string; content: React.ReactNode; + display?: { + enabled: () => boolean; + }; }>; onTabChange?: (selectedTab: string) => void; } @@ -39,9 +42,11 @@ export const RoutedTabs = ({ defaultPath, tabs, onTabChange, ...props }: Props) onChange={(newPath) => history.push(`${url}/${newPath}`)} {...props} > - {tabs.map((tab) => ( - - ))} + {tabs.map((tab) => { + return ( + + ); + })} diff --git a/datahub-web-react/src/app/shared/avatar/AvatarsGroup.tsx b/datahub-web-react/src/app/shared/avatar/AvatarsGroup.tsx index c6e7a45baa6df..262cd7cecdfaa 100644 --- a/datahub-web-react/src/app/shared/avatar/AvatarsGroup.tsx +++ b/datahub-web-react/src/app/shared/avatar/AvatarsGroup.tsx @@ -24,20 +24,19 @@ export default function AvatarsGroup({ owners, entityRegistry, maxCount = 6, siz {owner.owner.__typename === 'CorpUser' ? ( ) : ( owner.owner.__typename === 'CorpGroup' && ( ` color: #fff; background-color: ${(props) => props.$backgroundColor}; - font-size: ${(props) => (props.size ? `${Math.max(props.size / 2.0, 14)}px` : '14px')} !important; + font-size: ${(props) => (props.size ? `${Math.max(props.size / 2.0, 12)}px` : '14px')} !important; margin-right: 4px; height: 24px; width: 24px; .ant-avatar-string { text-align: center; - top: ${(props) => ((props.size || 0) < 24 ? '-4' : '0')}px; + top: 0px; + line-height: ${(props) => (props.size ? props.size : 24)}px; } `; diff --git a/datahub-web-react/src/graphql/analytics.graphql b/datahub-web-react/src/graphql/analytics.graphql index 40e6c106c9325..be16a8014f8da 100644 --- a/datahub-web-react/src/graphql/analytics.graphql +++ b/datahub-web-react/src/graphql/analytics.graphql @@ -4,38 +4,70 @@ query isAnalyticsEnabled { query getAnalyticsCharts { getAnalyticsCharts { + groupId title charts { - ... on TimeSeriesChart { - title - lines { - name - data { - x - y - } - } - dateRange { - start - end - } - interval + ...analyticsChart + } + } +} + +query getMetadataAnalyticsCharts($input: MetadataAnalyticsInput!) { + getMetadataAnalyticsCharts(input: $input) { + groupId + title + charts { + ...analyticsChart + } + } +} + +fragment analyticsChart on AnalyticsChart { + ... on TimeSeriesChart { + title + lines { + name + data { + x + y } - ... on BarChart { - title - bars { - name - segments { - label - value - } - } + } + dateRange { + start + end + } + interval + } + ... on BarChart { + title + bars { + name + segments { + label + value } - ... on TableChart { - title - columns - rows { - values + } + } + ... on TableChart { + title + columns + rows { + values + cells { + value + linkParams { + searchParams { + types + query + filters { + field + value + } + } + entityProfileParams { + urn + type + } } } } diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index 4a0da5d7860c3..b4770542d0194 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -36,7 +36,9 @@ fragment ownershipFields on Ownership { lastName fullName } - editableInfo { + editableProperties { + displayName + title pictureLink } } diff --git a/datahub-web-react/src/graphql/group.graphql b/datahub-web-react/src/graphql/group.graphql index d424be738ba35..cd6a8292b166e 100644 --- a/datahub-web-react/src/graphql/group.graphql +++ b/datahub-web-react/src/graphql/group.graphql @@ -25,7 +25,9 @@ query getGroup($urn: String!, $membersCount: Int!) { lastName fullName } - editableInfo { + editableProperties { + displayName + title pictureLink } } diff --git a/datahub-web-react/src/graphql/me.graphql b/datahub-web-react/src/graphql/me.graphql index 20f4339567fd3..2b27992f343df 100644 --- a/datahub-web-react/src/graphql/me.graphql +++ b/datahub-web-react/src/graphql/me.graphql @@ -12,7 +12,9 @@ query getMe { fullName email } - editableInfo { + editableProperties { + displayName + title pictureLink teams skills diff --git a/datahub-web-react/src/graphql/preview.graphql b/datahub-web-react/src/graphql/preview.graphql index 74e05e1a662a7..a6ade1b83f3f9 100644 --- a/datahub-web-react/src/graphql/preview.graphql +++ b/datahub-web-react/src/graphql/preview.graphql @@ -45,7 +45,9 @@ fragment entityPreview on Entity { lastName fullName } - editableInfo { + editableProperties { + displayName + title pictureLink } } diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql index 0a5ef5f9ecd46..5929094566e10 100644 --- a/datahub-web-react/src/graphql/search.graphql +++ b/datahub-web-react/src/graphql/search.graphql @@ -70,7 +70,9 @@ fragment searchResults on SearchResults { lastName fullName } - editableInfo { + editableProperties { + displayName + title pictureLink } } @@ -369,6 +371,25 @@ fragment searchResults on SearchResults { name } } + ... on CorpUser { + urn + username + properties { + displayName + fullName + } + editableProperties { + displayName + pictureLink + } + } + ... on CorpGroup { + urn + name + properties { + displayName + } + } } } } diff --git a/datahub-web-react/src/graphql/user.graphql b/datahub-web-react/src/graphql/user.graphql index c4ccd10f82a38..9cc318152dc2b 100644 --- a/datahub-web-react/src/graphql/user.graphql +++ b/datahub-web-react/src/graphql/user.graphql @@ -10,11 +10,18 @@ query getUser($urn: String!, $groupsCount: Int!) { lastName fullName email + departmentName } - editableInfo { + editableProperties { + slack + phone pictureLink + aboutMe teams skills + displayName + title + email } globalTags { ...globalTagsFields @@ -92,9 +99,11 @@ query listUsers($input: ListUsersInput!) { fullName email } - editableInfo { + editableProperties { + displayName pictureLink teams + title skills } status diff --git a/datahub-web-react/src/images/clickhouselogo.png b/datahub-web-react/src/images/clickhouselogo.png new file mode 100644 index 0000000000000..89ee65329ea90 Binary files /dev/null and b/datahub-web-react/src/images/clickhouselogo.png differ diff --git a/docs-website/build.gradle b/docs-website/build.gradle index 5284b7063c209..353f39bc869c2 100644 --- a/docs-website/build.gradle +++ b/docs-website/build.gradle @@ -63,7 +63,7 @@ task generateGraphQLDocumentation(type: YarnTask, dependsOn: [yarnInstall, gener args = ['docusaurus', 'docs:generate:graphql'] } -task yarnGenerate(type: YarnTask, dependsOn: [yarnInstall, generateGraphQLDocumentation] ) { +task yarnGenerate(type: YarnTask, dependsOn: [yarnInstall, generateGraphQLDocumentation, ':metadata-ingestion:modelDocGen'] ) { inputs.files(projectMdFiles) outputs.cacheIf { true } args = ['run', 'generate'] diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js index 29ce7269a42e6..ce9db79bfaa22 100644 --- a/docs-website/docusaurus.config.js +++ b/docs-website/docusaurus.config.js @@ -109,10 +109,6 @@ module.exports = { label: "Features", to: "docs/features", }, - { - label: "FAQs", - to: "docs/faq", - }, ], }, { @@ -189,6 +185,7 @@ module.exports = { path: "genDocs", sidebarPath: require.resolve("./sidebars.js"), editUrl: "https://github.com/linkedin/datahub/blob/master/", + numberPrefixParser: false, // TODO: make these work correctly with the doc generation showLastUpdateAuthor: true, showLastUpdateTime: true, diff --git a/docs-website/generateDocsDir.ts b/docs-website/generateDocsDir.ts index b976859fac74c..7141626235515 100644 --- a/docs-website/generateDocsDir.ts +++ b/docs-website/generateDocsDir.ts @@ -58,10 +58,33 @@ function accounted_for_in_sidebar(filepath: string): boolean { } function list_markdown_files(): string[] { - const all_markdown_files = execSync("cd .. && git ls-files . | grep '.md$'") + let all_markdown_files = execSync("git ls-files --full-name .. | grep '.md$'") .toString() .trim() .split("\n"); + let all_generated_markdown_files = execSync( + "cd .. && ls docs/generated/metamodel/**/*.md" + ) + .toString() + .trim() + .split("\n"); + all_markdown_files = [...all_markdown_files, ...all_generated_markdown_files]; + + if (!process.env.CI) { + // If not in CI, we also include "untracked" files. + const untracked_files = execSync( + "(git ls-files --full-name --others --exclude-standard .. | grep '.md$') || true" + ) + .toString() + .trim() + .split("\n") + .filter((filepath) => !all_generated_markdown_files.includes(filepath)); + + if (untracked_files.length > 0) { + console.log(`Including untracked files in docs list: ${untracked_files}`); + all_markdown_files = [...all_markdown_files, ...untracked_files]; + } + } const filter_patterns = [ // We don't need our issue and pull request templates. @@ -70,9 +93,10 @@ function list_markdown_files(): string[] { /^docs-website\//, // Don't want hosted docs for these. /^contrib\//, - // Keep main docs for kubernetes, but skip the inner docs + // Keep main docs for kubernetes, but skip the inner docs. /^datahub-kubernetes\//, - /^datahub-web\//, + // Various other docs/directories to ignore. + /^metadata-models\/docs\//, // these are used to generate docs, so we don't want to consider them here /^metadata-ingestion-examples\//, /^docker\/(?!README|datahub-upgrade|airflow\/local_airflow)/, // Drop all but a few docker docs. /^docs\/rfc\/templates\/000-template\.md$/, @@ -101,6 +125,10 @@ const hardcoded_slugs = { }; function get_slug(filepath: string): string { + // The slug is the URL path to the page. + // In the actual site, all slugs are prefixed with /docs. + // There's no need to do this cleanup, but it does make the URLs a bit more aesthetic. + if (filepath in hardcoded_slugs) { return hardcoded_slugs[filepath]; } @@ -256,6 +284,7 @@ function new_url(original: string, filepath: string): string { ".sh", ".env", ".sql", + // Using startsWith since some URLs will be .ext#LINENO ].some((ext) => suffix.startsWith(ext)) ) { // A reference to a file or directory in the Github repo. @@ -342,6 +371,12 @@ function markdown_sanitize_and_linkify(content: string): string { "[#$1](https://github.com/linkedin/datahub/pull/$1)" ); + // Prettify bare links to PRs. + content = content.replace( + /(\s+)(https:\/\/github\.com\/linkedin\/datahub\/pull\/(\d+))(\s+|$)/g, + "$1[#$3]($2)$4" + ); + return content; } @@ -364,7 +399,9 @@ slug: /releases custom_edit_url: https://github.com/linkedin/datahub/blob/master/docs-website/generateDocsDir.ts --- -# DataHub Releases\n\n`); +# DataHub Releases + +## Summary\n\n`); const releases_list = await octokit.rest.repos.listReleases({ owner: "linkedin", @@ -464,7 +501,7 @@ function write_markdown_file( } // Error if a doc is not accounted for in a sidebar. - const autogenerated_sidebar_directories = ["docs/rfc/active/"]; + const autogenerated_sidebar_directories = ["docs/generated/metamodel"]; for (const filepath of markdown_files) { if ( autogenerated_sidebar_directories.some((dir) => filepath.startsWith(dir)) diff --git a/docs-website/package.json b/docs-website/package.json index 7a9675eb1662f..932ed33833fb5 100644 --- a/docs-website/package.json +++ b/docs-website/package.json @@ -46,4 +46,4 @@ "ts-node": "^9.1.1", "typescript": "^4.1.5" } -} \ No newline at end of file +} diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 069514ee0a61a..3974d3b12adb5 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -15,10 +15,7 @@ function list_ids_in_directory(directory, hardcoded_labels) { } else { if (name.endsWith(".md")) { const slug = name.replace(/\.md$/, ""); - let id = `${directory}/${slug}`; - if (id.match(/\/\d+-.+/)) { - id = id.replace(/\/\d+-/, "/"); - } + const id = `${directory}/${slug}`; if (id in hardcoded_labels) { label = hardcoded_labels[id]; @@ -62,7 +59,12 @@ module.exports = { "docs/saas", "releases", ], - "Getting Started": ["docs/quickstart", "docs/cli", "docs/debugging"], + "Getting Started": [ + "docs/quickstart", + "docs/cli", + "docs/debugging", + "docs/how/search", + ], "Metadata Ingestion": [ // add a custom label since the default is 'Metadata Ingestion' // note that we also have to add the path to this file in sidebarsjs_hardcoded_titles in generateDocsDir.ts @@ -115,6 +117,14 @@ module.exports = { "Metadata Modeling": [ "docs/modeling/metadata-model", "docs/modeling/extending-the-metadata-model", + { + Entities: [ + { + type: "autogenerated", + dirName: "docs/generated/metamodel/entities", // '.' means the current docs folder + }, + ], + }, // TODO: change the titles of these, removing the "What is..." portion from the sidebar" // "docs/what/entity", // "docs/what/aspect", diff --git a/docs-website/src/components/Logos.js b/docs-website/src/components/Logos.js index 0edb0cfa1456c..adfd95c601d85 100644 --- a/docs-website/src/components/Logos.js +++ b/docs-website/src/components/Logos.js @@ -1,110 +1,132 @@ import React from "react"; import clsx from "clsx"; +import Tabs from "@theme/Tabs"; +import TabItem from "@theme/TabItem"; import Link from "@docusaurus/Link"; import useBaseUrl from "@docusaurus/useBaseUrl"; import styles from "../styles/logos.module.scss"; -const companyLogos = [ - { - name: "LinkedIn", - imageUrl: "/img/logos/companies/linkedin.svg", - size: "small", - }, - { - name: "Expedia Group", - imageUrl: "/img/logos/companies/expedia.svg", - size: "default", - }, - { - name: "Saxo Bank", - imageUrl: "/img/logos/companies/saxobank.svg", - size: "default", - }, - { - name: "Zynga", - imageUrl: "/img/logos/companies/zynga.png", - size: "default", - }, - { - name: "Grofers", - imageUrl: "/img/logos/companies/grofers.png", - size: "default", - }, - { - name: "Typeform", - imageUrl: "/img/logos/companies/typeform.svg", - size: "default", - }, - { - name: "Peloton", - imageUrl: "/img/logos/companies/peloton.png", - size: "large", - }, - { - name: "Optum", - imageUrl: "/img/logos/companies/optum.jpg", - size: "default", - }, - { - name: "SpotHero", - imageUrl: "/img/logos/companies/spothero.png", - size: "default", - }, - { - name: "Geotab", - imageUrl: "/img/logos/companies/geotab.jpg", - size: "small", - }, - { - name: "ThoughtWorks", - imageUrl: "/img/logos/companies/thoughtworks.png", - size: "large", - }, - { - name: "Viasat", - imageUrl: "/img/logos/companies/viasat.png", - size: "large", - }, - { - name: "Klarna", - imageUrl: "/img/logos/companies/klarna.svg", - size: "small", - }, - { - name: "Wolt", - imageUrl: "/img/logos/companies/wolt.png", - size: "large", - }, - { - name: "DFDS", - imageUrl: "/img/logos/companies/dfds.png", - size: "default", - }, - { - name: "BankSalad", - imageUrl: "/img/logos/companies/banksalad.png", - size: "large", - }, - { - name: "Uphold", - imageUrl: "/img/logos/companies/uphold.png", - size: "large", - }, - { - name: "hipages", - imageUrl: "/img/logos/companies/hipages.png", - size: "default", - }, - { - name: "Moloco", - imageUrl: "/img/logos/companies/moloco.png", - size: "default", - }, - { - name: "Stash", - imageUrl: "/img/logos/companies/stash.svg", - size: "default", +const companiesByIndustry = [ + { + name: "B2B & B2C", + companies: [ + { + name: "LinkedIn", + imageUrl: "/img/logos/companies/linkedin.svg", + size: "small", + }, + { + name: "Geotab", + imageUrl: "/img/logos/companies/geotab.jpg", + size: "small", + }, + { + name: "ThoughtWorks", + imageUrl: "/img/logos/companies/thoughtworks.png", + size: "default", + }, + { + name: "Expedia Group", + imageUrl: "/img/logos/companies/expedia.svg", + size: "default", + }, + { + name: "Typeform", + imageUrl: "/img/logos/companies/typeform.svg", + size: "small", + }, + { + name: "Peloton", + imageUrl: "/img/logos/companies/peloton.png", + size: "large", + }, + { + name: "Zynga", + imageUrl: "/img/logos/companies/zynga.png", + size: "default", + }, + ], + }, + { + name: "Financial & Fintech", + companies: [ + { + name: "Saxo Bank", + imageUrl: "/img/logos/companies/saxobank.svg", + size: "default", + }, + { + name: "Klarna", + imageUrl: "/img/logos/companies/klarna.svg", + size: "small", + }, + { + name: "BankSalad", + imageUrl: "/img/logos/companies/banksalad.png", + size: "large", + }, + { + name: "Uphold", + imageUrl: "/img/logos/companies/uphold.png", + size: "large", + }, + { + name: "Stash", + imageUrl: "/img/logos/companies/stash.svg", + size: "large", + }, + ], + }, + { + name: "E-Commerce", + companies: [ + { + name: "Grofers", + imageUrl: "/img/logos/companies/grofers.png", + size: "default", + }, + { + name: "SpotHero", + imageUrl: "/img/logos/companies/spothero.png", + size: "default", + }, + { + name: "hipages", + imageUrl: "/img/logos/companies/hipages.png", + size: "default", + }, + { + name: "Wolt", + imageUrl: "/img/logos/companies/wolt.png", + size: "large", + }, + ], + }, + { + name: "And More", + companies: [ + { + name: "Viasat", + imageUrl: "/img/logos/companies/viasat.png", + size: "large", + }, + { + name: "DFDS", + imageUrl: "/img/logos/companies/dfds.png", + size: "large", + }, + { + name: "Moloco", + imageUrl: "/img/logos/companies/moloco.png", + size: "default", + }, + { + name: "Optum", + imageUrl: "/img/logos/companies/optum.jpg", + size: "large", + }, + ], }, ]; @@ -197,36 +219,33 @@ const platformLogos = [ ]; export const PlatformLogos = () => ( - +
{[...platformLogos, ...platformLogos].map((logo, idx) => ( - {logo.name} + {logo.name} ))}
); export const CompanyLogos = () => ( -
-
- {[...companyLogos, ...companyLogos].map((logo, idx) => ( - {logo.name} +
+ + {companiesByIndustry.map((industry, idx) => ( + +
+ {industry.companies.map((company, idx) => ( + {company.name} + ))} +
+
))} -
+
); diff --git a/docs-website/src/components/Section.js b/docs-website/src/components/Section.js index 2413cd3d66048..625c41172d4ae 100644 --- a/docs-website/src/components/Section.js +++ b/docs-website/src/components/Section.js @@ -3,12 +3,12 @@ import clsx from "clsx"; import styles from "../styles/section.module.scss"; const Section = ({ title, children, withBackground }) => ( -
+

-

{title}

+
+

{title}

+
{children}
diff --git a/docs-website/src/styles/global.scss b/docs-website/src/styles/global.scss index 36175227a1980..20b0a2ed95729 100644 --- a/docs-website/src/styles/global.scss +++ b/docs-website/src/styles/global.scss @@ -157,3 +157,7 @@ div[class^="announcementBarContent"] { --ifm-footer-background-color: #000000; } } + +.button { + white-space: initial; +} diff --git a/docs-website/src/styles/logos.module.scss b/docs-website/src/styles/logos.module.scss index 85faaf4e5924c..7d4f63e6e4816 100644 --- a/docs-website/src/styles/logos.module.scss +++ b/docs-website/src/styles/logos.module.scss @@ -36,17 +36,53 @@ .companyWrapper { background: #fff; + display: flex; + flex-wrap: wrap; + align-items: center; + justify-content: center; + html[data-theme="dark"] & { filter: invert(1); mix-blend-mode: exclusion; } } +.companyLogoContainer { + display: flex; + align-items: center; + justify-content: center; + > div { + display: flex; + flex-direction: column; + align-items: center; + ul[role="tablist"] { + padding: 0 1rem; + overflow-x: auto; + max-width: calc(100vw - 1rem); + &::-webkit-scrollbar { + display: none; + } + li[role="tab"] { + border-radius: 100em; + padding: 0.5em 1em; + margin: 0 0.25em; + border: none; + white-space: nowrap; + &[aria-selected="true"] { + border: none; + background-color: var(--ifm-hover-overlay); + } + } + } + } +} + .companyLogo { width: auto; mix-blend-mode: luminosity; opacity: 0.66; height: 60px; + margin: 2.5rem; } .default { diff --git a/docs-website/src/styles/section.module.scss b/docs-website/src/styles/section.module.scss index 2ca0c5d3c6a42..314a573bf6b98 100644 --- a/docs-website/src/styles/section.module.scss +++ b/docs-website/src/styles/section.module.scss @@ -11,6 +11,9 @@ padding: 2rem 0; margin-top: calc(-1.5rem - 4px); border-radius: 4px; + margin-left: auto; + margin-right: auto; + text-align: center; } .withBackground { diff --git a/docs/how/search.md b/docs/how/search.md new file mode 100644 index 0000000000000..b3b8649c3cafc --- /dev/null +++ b/docs/how/search.md @@ -0,0 +1,125 @@ +# How to Search for Information in Datahub + +## Introduction + +The search bar is one of the means of finding data in Datahub. In this document, we discuss more effective ways of finding information beyond doing a standard keyword search. This is because keyword searches can return results from almost any part of an entity. + +### Search in Specific Fields: + +The following examples are in the format of +X: *typical question* : +```what to key in search bar```. [sample url](https://example.com) +Wildcard characters can be added to the search terms as well. These examples are non exhaustive and using Datasets as a reference. + +I want to: +1. *Find a dataset with the word **mask** in the name* : +```name: *mask*``` [Sample results](https://demo.datahubproject.io/search?page=1&query=name%3A%20%2Amask%2A) +This will return entities with **mask** in the name. +Names tends to be connected by other symbols, hence the wildcard symbols before and after the word. + +2. *Find a dataset with a property, **encoding*** +```customProperties: encoding*``` [Sample results](https://demo.datahubproject.io/search?page=1&query=customProperties%3A%20encoding%2A) +Dataset Properties are indexed in ElasticSearch the manner of key=value. Hence if you know the precise key-value pair, you can search using ```key=value```. However, if you only know the key, you can use wildcards to replace the value and that is what is being done here. + +3. *Find a dataset with a column name, **latitude*** +```fieldPaths: latitude``` [Sample results](https://demo.datahubproject.io/search?page=1&query=fieldPaths%3A%20latitude) +fieldPaths is the name of the attribute that holds the column name in Datasets. + +4. *Find a dataset with the term **latitude** in the field description* +```editedFieldDescriptions: latitude OR fieldDescriptions: latitude``` [Sample results](https://demo.datahubproject.io/search?page=1&query=editedFieldDescriptions%3A%20latitude%20OR%20fieldDescriptions%3A%20latitude) +Datasets has 2 attributes that contains field description. fieldDescription comes from the SchemaMetadata aspect, while editedFieldDescriptions comes from the EditableSchemaMetadata aspect. EditableSchemaMetadata holds information that comes from UI edits, while SchemaMetadata holds data from ingestion of the dataset. + +5. *Find a dataset with the term **logical** in the dataset description* +```editedDescription: *logical* OR description: *logical*``` [Sample results](https://demo.datahubproject.io/search?page=1&query=editedDescription%3A%20%2Alogical%2A%20OR%20description%3A%20%2Alogical%2A) +Similar to field descriptions, dataset descriptions can be found in 2 aspects, hence the need to search 2 attributes. + +6. *Find a dataset which reside in one of the browsing folders, for instance, the **hive** folder* +```browsePaths: *hive*``` [Sample results](https://demo.datahubproject.io/search?page=1&query=browsePaths%3A%20%2Ahive%2A) +BrowsePath is stored as a complete string, for instance ```/datasets/prod/hive/SampleKafkaDataset```, hence the need for wildcards on both ends of the term to return a result. + +## Where to find more information? +The sample queries here are non exhaustive. [The link here](https://demo.datahubproject.io/tag/urn:li:tag:Searchable) shows the current list of indexed fields for each entity inside Datahub. Click on the fields inside each entity and see which field has the tag ```Searchable```. +However, it does not tell you the specific attribute name to use for specialized searches. One way to do so is to inspect the ElasticSearch indices, for example: +```curl http://localhost:9200/_cat/indices``` returns all the ES indices in the ElasticSearch container. +``` +yellow open chartindex_v2_1643510690325 bQO_RSiCSUiKJYsmJClsew 1 1 2 0 8.5kb 8.5kb +yellow open mlmodelgroupindex_v2_1643510678529 OjIy0wb7RyKqLz3uTENRHQ 1 1 0 0 208b 208b +yellow open dataprocessindex_v2_1643510676831 2w-IHpuiTUCs6e6gumpYHA 1 1 0 0 208b 208b +yellow open corpgroupindex_v2_1643510673894 O7myCFlqQWKNtgsldzBS6g 1 1 3 0 16.8kb 16.8kb +yellow open corpuserindex_v2_1643510672335 0rIe_uIQTjme5Wy61MFbaw 1 1 6 2 32.4kb 32.4kb +yellow open datasetindex_v2_1643510688970 bjBfUEswSoSqPi3BP4iqjw 1 1 15 0 29.2kb 29.2kb +yellow open dataflowindex_v2_1643510681607 N8CMlRFvQ42rnYMVDaQJ2g 1 1 1 0 10.2kb 10.2kb +yellow open dataset_datasetusagestatisticsaspect_v1_1643510694706 kdqvqMYLRWq1oZt1pcAsXQ 1 1 4 0 8.9kb 8.9kb +yellow open .ds-datahub_usage_event-000003 YMVcU8sHTFilUwyI4CWJJg 1 1 186 0 203.9kb 203.9kb +yellow open datajob_datahubingestioncheckpointaspect_v1 nTXJf7C1Q3GoaIJ71gONxw 1 1 0 0 208b 208b +yellow open .ds-datahub_usage_event-000004 XRFwisRPSJuSr6UVmmsCsg 1 1 196 0 165.5kb 165.5kb +yellow open .ds-datahub_usage_event-000005 d0O6l5wIRLOyG6iIfAISGw 1 1 77 0 108.1kb 108.1kb +yellow open dataplatformindex_v2_1643510671426 _4SIIhfAT8yq_WROufunXA 1 1 0 0 208b 208b +yellow open mlmodeldeploymentindex_v2_1643510670629 n81eJIypSp2Qx-fpjZHgRw 1 1 0 0 208b 208b +yellow open .ds-datahub_usage_event-000006 oyrWKndjQ-a8Rt1IMD9aSA 1 1 143 0 127.1kb 127.1kb +yellow open mlfeaturetableindex_v2_1643510677164 iEXPt637S1OcilXpxPNYHw 1 1 5 0 8.9kb 8.9kb +yellow open .ds-datahub_usage_event-000001 S9EnGj64TEW8O3sLUb9I2Q 1 1 257 0 163.9kb 163.9kb +yellow open .ds-datahub_usage_event-000002 2xJyvKG_RYGwJOG9yq8pJw 1 1 44 0 155.4kb 155.4kb +yellow open dataset_datasetprofileaspect_v1_1643510693373 uahwTHGRRAC7w1c2VqVy8g 1 1 31 0 18.9kb 18.9kb +yellow open mlprimarykeyindex_v2_1643510687579 MUcmT8ASSASzEpLL98vrWg 1 1 7 0 9.5kb 9.5kb +yellow open glossarytermindex_v2_1643510686127 cQL8Pg6uQeKfMly9GPhgFQ 1 1 3 0 10kb 10kb +yellow open datajob_datahubingestionrunsummaryaspect_v1 rk22mIsDQ02-52MpWLm1DA 1 1 0 0 208b 208b +yellow open mlmodelindex_v2_1643510675399 gk-WSTVjRZmkDU5ggeFSqg 1 1 1 0 10.3kb 10.3kb +yellow open dashboardindex_v2_1643510691686 PQjSaGhTRqWW6zYjcqXo6Q 1 1 1 0 8.7kb 8.7kb +yellow open datahubpolicyindex_v2_1643510671774 ZyTrYx3-Q1e-7dYq1kn5Gg 1 1 0 0 208b 208b +yellow open datajobindex_v2_1643510682977 K-rbEyjBS6ew5uOQQS4sPw 1 1 2 0 11.3kb 11.3kb +yellow open datahubretentionindex_v2 8XrQTPwRTX278mx1SrNwZA 1 1 0 0 208b 208b +yellow open glossarynodeindex_v2_1643510678826 Y3_bCz0YR2KPwCrrVngDdA 1 1 1 0 7.4kb 7.4kb +yellow open system_metadata_service_v1 36spEDbDTdKgVlSjE8t-Jw 1 1 387 8 63.2kb 63.2kb +yellow open schemafieldindex_v2_1643510684410 tZ1gC3haTReRLmpCxirVxQ 1 1 0 0 208b 208b +yellow open mlfeatureindex_v2_1643510680246 aQO5HF0mT62Znn-oIWBC8A 1 1 20 0 17.4kb 17.4kb +yellow open tagindex_v2_1643510684785 PfnUdCUORY2fnF3I3W7HwA 1 1 3 1 18.6kb 18.6kb +``` +The index name will vary from instance to instance. Indexed information about Datasets can be found in: +```curl http://localhost:9200/datasetindex_v2_1643510688970/_search?=pretty``` + +example information of a dataset: +``` +{ + "_index" : "datasetindex_v2_1643510688970", + "_type" : "_doc", + "_id" : "urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Akafka%2CSampleKafkaDataset%2CPROD%29", + "_score" : 1.0, + "_source" : { + "urn" : "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)", + "name" : "SampleKafkaDataset", + "browsePaths" : [ + "/prod/kafka/SampleKafkaDataset" + ], + "origin" : "PROD", + "customProperties" : [ + "prop2=pikachu", + "prop1=fakeprop" + ], + "hasDescription" : false, + "hasOwners" : true, + "owners" : [ + "urn:li:corpuser:jdoe", + "urn:li:corpuser:datahub" + ], + "fieldPaths" : [ + "[version=2.0].[type=boolean].field_foo_2", + "[version=2.0].[type=boolean].field_bar", + "[version=2.0].[key=True].[type=int].id" + ], + "fieldGlossaryTerms" : [ ], + "fieldDescriptions" : [ + "Foo field description", + "Bar field description", + "Id specifying which partition the message should go to" + ], + "fieldTags" : [ + "urn:li:tag:NeedsDocumentation" + ], + "platform" : "urn:li:dataPlatform:kafka" + } + }, +``` + + + diff --git a/docs/modeling/metadata-model.md b/docs/modeling/metadata-model.md index c5593fd094db3..62e83d36f729f 100644 --- a/docs/modeling/metadata-model.md +++ b/docs/modeling/metadata-model.md @@ -36,12 +36,14 @@ Here is an example graph consisting of 3 types of entity (CorpUser, Chart, Dashb DataHub's "core" Entity types model the Data Assets that comprise the Modern Data Stack. They include -1. **Data Platform**: A type of Data "Platform". That is, an external system that is involved in processing, storing, or visualizing Data Assets. Examples include MySQL, Snowflake, Redshift, and S3. -2. **Dataset**: A collection of data. Tables, Views, Streams, Document Collections, and Files are all modeled as "Datasets" on DataHub. Datasets can have tags, owners, links, glossary terms, and descriptions attached to them. They can also have specific sub-types, such as "View", "Collection", "Stream", "Explore", and more. Examples include Postgres Tables, MongoDB Collections, or S3 files. -3. **Chart**: A single data vizualization derived from a Dataset. A single Chart can be a part of multiple Dashboards. Charts can have tags, owners, links, glossary terms, and descriptions attached to them. Examples include a Superset or Looker Chart. -4. **Dashboard**: A collection of Charts for visualization. Dashboards can have tags, owners, links, glossary terms, and descriptions attached to them. Examples include a Superset or Mode Dashboard. -5. **Data Job** (Task): An executable job that processes data assets, where "processing" implies consuming data, producing data, or both. Data Jobs can have tags, owners, links, glossary terms, and descriptions attached to them. They must belong to a single Data Flow. Examples include an Airflow Task. -6. **Data Flow** (Pipeline): An executable collection of Data Jobs with dependencies among them, or a DAG. Data Jobs can have tags, owners, links, glossary terms, and descriptions attached to them. Examples include an Airflow DAG. +1. **[Data Platform](docs/generated/metamodel/entities/dataPlatform.md)**: A type of Data "Platform". That is, an external system that is involved in processing, storing, or visualizing Data Assets. Examples include MySQL, Snowflake, Redshift, and S3. +2. **[Dataset](docs/generated/metamodel/entities/dataset.md)**: A collection of data. Tables, Views, Streams, Document Collections, and Files are all modeled as "Datasets" on DataHub. Datasets can have tags, owners, links, glossary terms, and descriptions attached to them. They can also have specific sub-types, such as "View", "Collection", "Stream", "Explore", and more. Examples include Postgres Tables, MongoDB Collections, or S3 files. +3. **[Chart](docs/generated/metamodel/entities/chart.md)**: A single data vizualization derived from a Dataset. A single Chart can be a part of multiple Dashboards. Charts can have tags, owners, links, glossary terms, and descriptions attached to them. Examples include a Superset or Looker Chart. +4. **[Dashboard](docs/generated/metamodel/entities/dashboard.md)**: A collection of Charts for visualization. Dashboards can have tags, owners, links, glossary terms, and descriptions attached to them. Examples include a Superset or Mode Dashboard. +5. **[Data Job](docs/generated/metamodel/entities/dataJob.md)** (Task): An executable job that processes data assets, where "processing" implies consuming data, producing data, or both. Data Jobs can have tags, owners, links, glossary terms, and descriptions attached to them. They must belong to a single Data Flow. Examples include an Airflow Task. +6. **[Data Flow](docs/generated/metamodel/entities/dataFlow.md)** (Pipeline): An executable collection of Data Jobs with dependencies among them, or a DAG. Data Jobs can have tags, owners, links, glossary terms, and descriptions attached to them. Examples include an Airflow DAG. + +See the **Metadata Modeling/Entities** section on the left to explore the entire model. ## The Entity Registry @@ -73,29 +75,20 @@ to the YAML configuration, instead of creating new Snapshot / Aspect files. To explore the current DataHub metadata model, you can inspect this high-level picture that shows the different entities and edges between them showing the relationships between them. ![Metadata Model Graph](../imgs/datahub-metadata-model.png) -To navigate the aspect model for specific entities and explore relationships using the `foreign-key` concept, you can view them in our demo environment. +To navigate the aspect model for specific entities and explore relationships using the `foreign-key` concept, you can view them in our demo environment or navigate the auto-generated docs in the **Metadata Modeling/Entities** section on the left. For example, here are helpful links to the most popular entities in DataHub's metadata model: -* Dataset: [Profile](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Dataset,PROD)/Schema?is_lineage_mode=false) [Documentation](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Dataset,PROD)/Documentation?is_lineage_mode=false) -* Dashboard: [Profile](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Dashboard,PROD)/Schema?is_lineage_mode=false) [Documentation](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Dashboard,PROD)/Documentation?is_lineage_mode=false) -* User (a.k.a CorpUser): [Profile](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Corpuser,PROD)/Schema?is_lineage_mode=false) [Documentation](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Corpuser,PROD)/Documentation?is_lineage_mode=false) -* Pipeline (a.k.a DataFlow): [Profile](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,DataFlow,PROD)/Schema?is_lineage_mode=false) [Documentation](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,DataFlow,PROD)/Documentation?is_lineage_mode=false) -* Feature Table (a.k.a. MLFeatureTable): [Profile](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,MlFeatureTable,PROD)/Schema?is_lineage_mode=false) [Documentation](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,MlFeatureTable,PROD)/Documentation?is_lineage_mode=false) -* For the full list of entities in the metadata model, browse them [here](https://demo.datahubproject.io/browse/dataset/prod/datahub/entities) - -During metadata ingestion, these entities are represented using [metadata events](../what/mxe.md). +* [Dataset](docs/generated/metamodel/entities/dataset.md): [Profile](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Dataset,PROD)/Schema?is_lineage_mode=false) [Documentation](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Dataset,PROD)/Documentation?is_lineage_mode=false) +* [Dashboard](docs/generated/metamodel/entities/dashboard.md): [Profile](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Dashboard,PROD)/Schema?is_lineage_mode=false) [Documentation](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Dashboard,PROD)/Documentation?is_lineage_mode=false) +* [User (a.k.a CorpUser)](docs/generated/metamodel/entities/corpuser.md): [Profile](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Corpuser,PROD)/Schema?is_lineage_mode=false) [Documentation](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,Corpuser,PROD)/Documentation?is_lineage_mode=false) +* [Pipeline (a.k.a DataFlow)](docs/generated/metamodel/entities/dataFlow.md): [Profile](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,DataFlow,PROD)/Schema?is_lineage_mode=false) [Documentation](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,DataFlow,PROD)/Documentation?is_lineage_mode=false) +* [Feature Table (a.k.a. MLFeatureTable)](docs/generated/metamodel/entities/mlFeatureTable.md): [Profile](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,MlFeatureTable,PROD)/Schema?is_lineage_mode=false) [Documentation](https://demo.datahubproject.io/dataset/urn:li:dataset:(urn:li:dataPlatform:datahub,MlFeatureTable,PROD)/Documentation?is_lineage_mode=false) +* For the full list of entities in the metadata model, browse them [here](https://demo.datahubproject.io/browse/dataset/prod/datahub/entities) or use the **Metadata Modeling/Entities** section on the left. ### Generating documentation for the Metadata Model -The metadata model documentation can be generated and uploaded into a running DataHub instance using the following command below. - -```console -./gradlew :metadata-ingestion:modelDocUpload -``` - -**_NOTE_**: This will upload the model documentation to the DataHub instance running at the environment variable `$DATAHUB_SERVER` (http://localhost:8080 by default) - -It will also generate a few files under `metadata-ingestion/generated/docs` such as a dot file called `metadata_graph.dot` that you can use to visualize the relationships among the entities. +- This website: Metadata model documentation for this website is generated using `./gradlew :docs-website:yarnBuild`, which delegates the model doc generation to the `modelDocGen` task in the `metadata-ingestion` module. +- Uploading documentation to a running DataHub Instance: The metadata model documentation can be generated and uploaded into a running DataHub instance using the command `./gradlew :metadata-ingestion:modelDocUpload`. **_NOTE_**: This will upload the model documentation to the DataHub instance running at the environment variable `$DATAHUB_SERVER` (http://localhost:8080 by default) ## Querying the Metadata Graph diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entity.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entity.java index 075098c80d79b..c446f63b65321 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entity.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entity.java @@ -5,11 +5,14 @@ import lombok.AllArgsConstructor; import lombok.NoArgsConstructor; import lombok.Value; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + @Value @NoArgsConstructor(force = true, access = AccessLevel.PRIVATE) @AllArgsConstructor +@JsonIgnoreProperties(ignoreUnknown = true) public class Entity { String name; String doc; diff --git a/entity-registry/src/test/resources/test-entity-registry.yml b/entity-registry/src/test/resources/test-entity-registry.yml index 7730fe93d1b5d..4e39d103235a8 100644 --- a/entity-registry/src/test/resources/test-entity-registry.yml +++ b/entity-registry/src/test/resources/test-entity-registry.yml @@ -2,6 +2,7 @@ id: test-registry entities: - name: dataset keyAspect: datasetKey + category: core aspects: - datasetProperties - schemaMetadata diff --git a/metadata-events/mxe-schemas/rename-namespace.sh b/metadata-events/mxe-schemas/rename-namespace.sh index 2cc8cb0d23c54..6402e09b65c07 100755 --- a/metadata-events/mxe-schemas/rename-namespace.sh +++ b/metadata-events/mxe-schemas/rename-namespace.sh @@ -1,6 +1,6 @@ #!/bin/sh -SCRIPT_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" +SCRIPT_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]:-$0}" )" >/dev/null && pwd )" # Rename all com.linkedin.* to com.linkedin.pegasus2avro.*, except for com.linkedin.avro2pegasus.* find $SCRIPT_ROOT/../mxe-schemas/src/renamed -type f -print0 | \ @@ -8,4 +8,4 @@ xargs -0 perl -pi -e 's/com\.linkedin\.(?!avro2pegasus)/com\.linkedin\.pegasus2a # Rename com.linkedin.avro2pegasus.* to com.linkedin.* find $SCRIPT_ROOT/../mxe-schemas/src/renamed -type f -print0 | \ -xargs -0 perl -pi -e 's/com\.linkedin\.avro2pegasus\./com\.linkedin\./g' \ No newline at end of file +xargs -0 perl -pi -e 's/com\.linkedin\.avro2pegasus\./com\.linkedin\./g' diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle index 576a59e92b2c3..77f46f338f690 100644 --- a/metadata-ingestion/build.gradle +++ b/metadata-ingestion/build.gradle @@ -43,10 +43,15 @@ task installDev(type: Exec, dependsOn: [install]) { "${venv_name}/bin/pip install -e .[dev] && touch ${venv_name}/.build_install_dev_sentinel" } -task modelDocGen(type: Exec, dependsOn: [codegen, installDev]) { - inputs.files(project.fileTree(dir: "../metadata-events/mxe-schemas/src/", include: "**/*.avsc")) - outputs.dir('generated/docs') - commandLine 'bash', '-c', "source ${venv_name}/bin/activate && ./scripts/modeldocgen.sh" +task modelDocGen(type: Exec, dependsOn: [codegen]) { + inputs.files( + file('scripts/modeldocgen.py'), + project.fileTree(dir: "../metadata-models/docs/entities/", include: "**/*.md"), + project.fileTree(dir: "examples/", include: "**/*.py"), + project.fileTree(dir: "../metadata-events/mxe-schemas/src/", include: "**/*.avsc") + ) + outputs.dir('../docs/generated') + commandLine 'bash', '-c', "source ${venv_name}/bin/activate && ./scripts/modeldocgen.sh" } task modelDocUpload(type: Exec, dependsOn: [modelDocGen]) { @@ -127,5 +132,6 @@ clean { delete 'build' delete 'dist' delete 'src/datahub/metadata' + delete '../docs/generated' } clean.dependsOn cleanPythonCache diff --git a/metadata-ingestion/examples/library/data_quality_mcpw_rest.py b/metadata-ingestion/examples/library/data_quality_mcpw_rest.py index b171aa41b045a..ab52c65267795 100644 --- a/metadata-ingestion/examples/library/data_quality_mcpw_rest.py +++ b/metadata-ingestion/examples/library/data_quality_mcpw_rest.py @@ -30,9 +30,7 @@ def assertionUrn(info: AssertionInfo) -> str: return builder.make_assertion_urn(assertionId) -def emitAssertionResult( - assertionResult: AssertionResult, datasetUrn: str -) -> None: +def emitAssertionResult(assertionResult: AssertionResult, datasetUrn: str) -> None: dataset_assertionResult_mcp = MetadataChangeProposalWrapper( entityType="dataset", diff --git a/metadata-ingestion/examples/library/dataset_add_column_tag.py b/metadata-ingestion/examples/library/dataset_add_column_tag.py new file mode 100644 index 0000000000000..934ead98b7c7a --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_add_column_tag.py @@ -0,0 +1,110 @@ +import logging +import time + +from datahub.emitter.mce_builder import make_dataset_urn, make_tag_urn +from datahub.emitter.mcp import MetadataChangeProposalWrapper + +# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough) +from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph + +# Imports for metadata model classes +from datahub.metadata.schema_classes import ( + AuditStampClass, + ChangeTypeClass, + EditableSchemaFieldInfoClass, + EditableSchemaMetadataClass, + GlobalTagsClass, + TagAssociationClass, +) + +log = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +def get_simple_field_path_from_v2_field_path(field_path: str) -> str: + """A helper function to extract simple . path notation from the v2 field path""" + if field_path.startswith("[version=2.0]"): + # this is a v2 field path + tokens = [ + t + for t in field_path.split(".") + if not (t.startswith("[") or t.endswith("]")) + ] + path = ".".join(tokens) + return path + else: + # not a v2, we assume this is a simple path + return field_path + + +# Inputs -> the column, dataset and the tag to set +column = "address.zipcode" +dataset_urn = make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD") +tag_to_add = make_tag_urn("location") + + +# First we get the current editable schema metadata +gms_endpoint = "http://localhost:8080" +graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint)) + + +current_editable_schema_metadata = graph.get_aspect( + entity_urn=dataset_urn, + aspect="editableSchemaMetadata", + aspect_type=EditableSchemaMetadataClass, +) + + +# Some pre-built objects to help all the conditional pathways +tag_association_to_add = TagAssociationClass(tag=tag_to_add) +tags_aspect_to_set = GlobalTagsClass(tags=[tag_association_to_add]) +field_info_to_set = EditableSchemaFieldInfoClass( + fieldPath=column, globalTags=tags_aspect_to_set +) + + +need_write = False +field_match = False +if current_editable_schema_metadata: + for fieldInfo in current_editable_schema_metadata.editableSchemaFieldInfo: + if get_simple_field_path_from_v2_field_path(fieldInfo.fieldPath) == column: + # we have some editable schema metadata for this field + field_match = True + if fieldInfo.globalTags: + if tag_to_add not in [x.tag for x in fieldInfo.globalTags.tags]: + # this tag is not present + fieldInfo.globalTags.tags.append(tag_association_to_add) + need_write = True + else: + fieldInfo.globalTags = tags_aspect_to_set + need_write = True + + if not field_match: + # this field isn't present in the editable schema metadata aspect, add it + field_info = field_info_to_set + current_editable_schema_metadata.editableSchemaFieldInfo.append(field_info) + need_write = True + +else: + # create a brand new editable schema metadata aspect + now = int(time.time() * 1000) # milliseconds since epoch + current_timestamp = AuditStampClass(time=now, actor="urn:li:corpuser:ingestion") + current_editable_schema_metadata = EditableSchemaMetadataClass( + editableSchemaFieldInfo=[field_info_to_set], + created=current_timestamp, + ) + need_write = True + +if need_write: + event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper( + entityType="dataset", + changeType=ChangeTypeClass.UPSERT, + entityUrn=dataset_urn, + aspectName="editableSchemaMetadata", + aspect=current_editable_schema_metadata, + ) + graph.emit(event) + log.info(f"Tag {tag_to_add} added to column {column} of dataset {dataset_urn}") + +else: + log.info(f"Tag {tag_to_add} already attached to column {column}, omitting write") diff --git a/metadata-ingestion/examples/library/dataset_add_column_term.py b/metadata-ingestion/examples/library/dataset_add_column_term.py new file mode 100644 index 0000000000000..c39c38bf51f1b --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_add_column_term.py @@ -0,0 +1,112 @@ +import logging +import time + +from datahub.emitter.mce_builder import make_dataset_urn, make_term_urn +from datahub.emitter.mcp import MetadataChangeProposalWrapper + +# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough) +from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph + +# Imports for metadata model classes +from datahub.metadata.schema_classes import ( + AuditStampClass, + ChangeTypeClass, + EditableSchemaFieldInfoClass, + EditableSchemaMetadataClass, + GlossaryTermAssociationClass, + GlossaryTermsClass, +) + +log = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +def get_simple_field_path_from_v2_field_path(field_path: str) -> str: + """A helper function to extract simple . path notation from the v2 field path""" + if field_path.startswith("[version=2.0]"): + # this is a v2 field path + tokens = [ + t + for t in field_path.split(".") + if not (t.startswith("[") or t.endswith("]")) + ] + path = ".".join(tokens) + return path + else: + # not a v2, we assume this is a simple path + return field_path + + +# Inputs -> the column, dataset and the term to set +column = "address.zipcode" +dataset_urn = make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD") +term_to_add = make_term_urn("Classification.Location") + + +# First we get the current editable schema metadata +gms_endpoint = "http://localhost:8080" +graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint)) + + +current_editable_schema_metadata = graph.get_aspect( + entity_urn=dataset_urn, + aspect="editableSchemaMetadata", + aspect_type=EditableSchemaMetadataClass, +) + + +# Some pre-built objects to help all the conditional pathways +now = int(time.time() * 1000) # milliseconds since epoch +current_timestamp = AuditStampClass(time=now, actor="urn:li:corpuser:ingestion") + +term_association_to_add = GlossaryTermAssociationClass(urn=term_to_add) +term_aspect_to_set = GlossaryTermsClass( + terms=[term_association_to_add], auditStamp=current_timestamp +) +field_info_to_set = EditableSchemaFieldInfoClass( + fieldPath=column, glossaryTerms=term_aspect_to_set +) + +need_write = False +field_match = False +if current_editable_schema_metadata: + for fieldInfo in current_editable_schema_metadata.editableSchemaFieldInfo: + if get_simple_field_path_from_v2_field_path(fieldInfo.fieldPath) == column: + # we have some editable schema metadata for this field + field_match = True + if fieldInfo.glossaryTerms: + if term_to_add not in [x.urn for x in fieldInfo.glossaryTerms.terms]: + # this tag is not present + fieldInfo.glossaryTerms.terms.append(term_association_to_add) + need_write = True + else: + fieldInfo.glossaryTerms = term_aspect_to_set + need_write = True + + if not field_match: + # this field isn't present in the editable schema metadata aspect, add it + field_info = field_info_to_set + current_editable_schema_metadata.editableSchemaFieldInfo.append(field_info) + need_write = True + +else: + # create a brand new editable schema metadata aspect + current_editable_schema_metadata = EditableSchemaMetadataClass( + editableSchemaFieldInfo=[field_info_to_set], + created=current_timestamp, + ) + need_write = True + +if need_write: + event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper( + entityType="dataset", + changeType=ChangeTypeClass.UPSERT, + entityUrn=dataset_urn, + aspectName="editableSchemaMetadata", + aspect=current_editable_schema_metadata, + ) + graph.emit(event) + log.info(f"Tag {term_to_add} added to column {column} of dataset {dataset_urn}") + +else: + log.info(f"Tag {term_to_add} already attached to column {column}, omitting write") diff --git a/metadata-ingestion/examples/library/dataset_add_documentation.py b/metadata-ingestion/examples/library/dataset_add_documentation.py new file mode 100644 index 0000000000000..c31c63163a69f --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_add_documentation.py @@ -0,0 +1,107 @@ +import logging +import time + +from datahub.emitter.mce_builder import make_dataset_urn +from datahub.emitter.mcp import MetadataChangeProposalWrapper + +# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough) +from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph + +# Imports for metadata model classes +from datahub.metadata.schema_classes import ( + AuditStampClass, + ChangeTypeClass, + EditableDatasetPropertiesClass, + InstitutionalMemoryClass, + InstitutionalMemoryMetadataClass, +) + +log = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +# Inputs -> owner, ownership_type, dataset +documentation_to_add = "## The Real Estate Sales Dataset\nThis is a really important Dataset that contains all the relevant information about sales that have happened organized by address.\n" +link_to_add = "https://wikipedia.com/real_estate" +link_description = "This is the definition of what real estate means" +dataset_urn = make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD") + +# Some helpful variables to fill out objects later +now = int(time.time() * 1000) # milliseconds since epoch +current_timestamp = AuditStampClass(time=now, actor="urn:li:corpuser:ingestion") +institutional_memory_element = InstitutionalMemoryMetadataClass( + url=link_to_add, + description=link_description, + createStamp=current_timestamp, +) + + +# First we get the current owners +gms_endpoint = "http://localhost:8080" +graph = DataHubGraph(config=DatahubClientConfig(server=gms_endpoint)) + +current_editable_properties = graph.get_aspect( + entity_urn=dataset_urn, + aspect="editableDatasetProperties", + aspect_type=EditableDatasetPropertiesClass, +) + +need_write = False +if current_editable_properties: + if documentation_to_add != current_editable_properties.description: + current_editable_properties.description = documentation_to_add + need_write = True +else: + # create a brand new editable dataset properties aspect + current_editable_properties = EditableDatasetPropertiesClass( + created=current_timestamp, description=documentation_to_add + ) + need_write = True + +if need_write: + event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper( + entityType="dataset", + changeType=ChangeTypeClass.UPSERT, + entityUrn=dataset_urn, + aspectName="editableDatasetProperties", + aspect=current_editable_properties, + ) + graph.emit(event) + log.info(f"Documentation added to dataset {dataset_urn}") + +else: + log.info("Documentation already exists and is identical, omitting write") + + +current_institutional_memory = graph.get_aspect( + entity_urn=dataset_urn, + aspect="institutionalMemory", + aspect_type=InstitutionalMemoryClass, +) + +need_write = False + +if current_institutional_memory: + if link_to_add not in [x.url for x in current_institutional_memory.elements]: + current_institutional_memory.elements.append(institutional_memory_element) + need_write = True +else: + # create a brand new institutional memory aspect + current_institutional_memory = InstitutionalMemoryClass( + elements=[institutional_memory_element] + ) + need_write = True + +if need_write: + event = MetadataChangeProposalWrapper( + entityType="dataset", + changeType=ChangeTypeClass.UPSERT, + entityUrn=dataset_urn, + aspectName="institutionalMemory", + aspect=current_institutional_memory, + ) + graph.emit(event) + log.info(f"Link {link_to_add} added to dataset {dataset_urn}") + +else: + log.info(f"Link {link_to_add} already exists and is identical, omitting write") diff --git a/metadata-ingestion/examples/library/dataset_add_owner.py b/metadata-ingestion/examples/library/dataset_add_owner.py new file mode 100644 index 0000000000000..6dc01da9aad2d --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_add_owner.py @@ -0,0 +1,71 @@ +import logging +from typing import Optional + +from datahub.emitter.mce_builder import make_dataset_urn, make_user_urn +from datahub.emitter.mcp import MetadataChangeProposalWrapper + +# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough) +from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph + +# Imports for metadata model classes +from datahub.metadata.schema_classes import ( + ChangeTypeClass, + OwnerClass, + OwnershipClass, + OwnershipTypeClass, +) + +log = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +# Inputs -> owner, ownership_type, dataset +owner_to_add = make_user_urn("jdoe") +ownership_type = OwnershipTypeClass.DATAOWNER +dataset_urn = make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD") + +# Some objects to help with conditional pathways later +owner_class_to_add = OwnerClass(owner=owner_to_add, type=ownership_type) +ownership_to_add = OwnershipClass(owners=[owner_class_to_add]) + + +# First we get the current owners +gms_endpoint = "http://localhost:8080" +graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint)) + + +current_owners: Optional[OwnershipClass] = graph.get_aspect( + entity_urn=dataset_urn, + aspect="ownership", + aspect_type=OwnershipClass, +) + + +need_write = False +if current_owners: + if (owner_to_add, ownership_type) not in [ + (x.owner, x.type) for x in current_owners.owners + ]: + # owners exist, but this owner is not present in the current owners + current_owners.owners.append(owner_class_to_add) + need_write = True +else: + # create a brand new ownership aspect + current_owners = ownership_to_add + need_write = True + +if need_write: + event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper( + entityType="dataset", + changeType=ChangeTypeClass.UPSERT, + entityUrn=dataset_urn, + aspectName="ownership", + aspect=current_owners, + ) + graph.emit(event) + log.info( + f"Owner {owner_to_add}, type {ownership_type} added to dataset {dataset_urn}" + ) + +else: + log.info(f"Owner {owner_to_add} already exists, omitting write") diff --git a/metadata-ingestion/examples/library/dataset_add_tag.py b/metadata-ingestion/examples/library/dataset_add_tag.py new file mode 100644 index 0000000000000..29503f90c4851 --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_add_tag.py @@ -0,0 +1,59 @@ +import logging +from typing import Optional + +from datahub.emitter.mce_builder import make_dataset_urn, make_tag_urn +from datahub.emitter.mcp import MetadataChangeProposalWrapper + +# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough) +from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph + +# Imports for metadata model classes +from datahub.metadata.schema_classes import ( + ChangeTypeClass, + GlobalTagsClass, + TagAssociationClass, +) + +log = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +# First we get the current tags +gms_endpoint = "http://localhost:8080" +graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint)) + +dataset_urn = make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD") + +current_tags: Optional[GlobalTagsClass] = graph.get_aspect( + entity_urn=dataset_urn, + aspect="globalTags", + aspect_type=GlobalTagsClass, +) + +tag_to_add = make_tag_urn("purchase") +tag_association_to_add = TagAssociationClass(tag=tag_to_add) + +need_write = False +if current_tags: + if tag_to_add not in [x.tag for x in current_tags.tags]: + # tags exist, but this tag is not present in the current tags + current_tags.tags.append(TagAssociationClass(tag_to_add)) + need_write = True +else: + # create a brand new tags aspect + current_tags = GlobalTagsClass(tags=[tag_association_to_add]) + need_write = True + +if need_write: + event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper( + entityType="dataset", + changeType=ChangeTypeClass.UPSERT, + entityUrn=dataset_urn, + aspectName="globalTags", + aspect=current_tags, + ) + graph.emit(event) + log.info(f"Tag {tag_to_add} added to dataset {dataset_urn}") + +else: + log.info(f"Tag {tag_to_add} already exists, omitting write") diff --git a/metadata-ingestion/examples/library/dataset_add_term.py b/metadata-ingestion/examples/library/dataset_add_term.py new file mode 100644 index 0000000000000..3976359d6d1d4 --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_add_term.py @@ -0,0 +1,63 @@ +import logging +from typing import Optional + +from datahub.emitter.mce_builder import make_dataset_urn, make_term_urn +from datahub.emitter.mcp import MetadataChangeProposalWrapper + +# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough) +from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph + +# Imports for metadata model classes +from datahub.metadata.schema_classes import ( + AuditStampClass, + ChangeTypeClass, + GlossaryTermAssociationClass, + GlossaryTermsClass, +) + +log = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +# First we get the current terms +gms_endpoint = "http://localhost:8080" +graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint)) + +dataset_urn = make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD") + +current_terms: Optional[GlossaryTermsClass] = graph.get_aspect( + entity_urn=dataset_urn, + aspect="glossaryTerms", + aspect_type=GlossaryTermsClass, +) + +term_to_add = make_term_urn("Classification.HighlyConfidential") +term_association_to_add = GlossaryTermAssociationClass(urn=term_to_add) +# an audit stamp that basically says we have no idea when these terms were added to this dataset +# change the time value to (time.time() * 1000) if you want to specify the current time of running this code as the time +unknown_audit_stamp = AuditStampClass(time=0, actor="urn:li:corpuser:ingestion") +need_write = False +if current_terms: + if term_to_add not in [x.urn for x in current_terms.terms]: + # terms exist, but this term is not present in the current terms + current_terms.terms.append(term_association_to_add) + need_write = True +else: + # create a brand new terms aspect + current_terms = GlossaryTermsClass( + terms=[term_association_to_add], + auditStamp=unknown_audit_stamp, + ) + need_write = True + +if need_write: + event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper( + entityType="dataset", + changeType=ChangeTypeClass.UPSERT, + entityUrn=dataset_urn, + aspectName="glossaryTerms", + aspect=current_terms, + ) + graph.emit(event) +else: + log.info(f"Term {term_to_add} already exists, omitting write") diff --git a/metadata-ingestion/examples/library/dataset_schema.py b/metadata-ingestion/examples/library/dataset_schema.py new file mode 100644 index 0000000000000..79c2706c04611 --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_schema.py @@ -0,0 +1,53 @@ +# Imports for urn construction utility methods +from datahub.emitter.mce_builder import make_data_platform_urn, make_dataset_urn +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.rest_emitter import DatahubRestEmitter + +# Imports for metadata model classes +from datahub.metadata.schema_classes import ( + ChangeTypeClass, + DateTypeClass, + OtherSchemaClass, + SchemaFieldClass, + SchemaFieldDataTypeClass, + SchemaMetadataClass, + StringTypeClass, +) + +event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper( + entityType="dataset", + changeType=ChangeTypeClass.UPSERT, + entityUrn=make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD"), + aspectName="schemaMetadata", + aspect=SchemaMetadataClass( + schemaName="customer", # not used + platform=make_data_platform_urn("hive"), # important <- platform must be an urn + version=0, # when the source system has a notion of versioning of schemas, insert this in, otherwise leave as 0 + hash="", # when the source system has a notion of unique schemas identified via hash, include a hash, else leave it as empty string + platformSchema=OtherSchemaClass(rawSchema="__insert raw schema here__"), + fields=[ + SchemaFieldClass( + fieldPath="address.zipcode", + type=SchemaFieldDataTypeClass(type=StringTypeClass()), + nativeDataType="VARCHAR(50)", # use this to provide the type of the field in the source system's vernacular + description="This is the zipcode of the address. Specified using extended form and limited to addresses in the United States", + ), + SchemaFieldClass( + fieldPath="address.street", + type=SchemaFieldDataTypeClass(type=StringTypeClass()), + nativeDataType="VARCHAR(100)", + description="Street corresponding to the address", + ), + SchemaFieldClass( + fieldPath="last_sold_date", + type=SchemaFieldDataTypeClass(type=DateTypeClass()), + nativeDataType="Date", + description="Date of the last sale date for this property", + ), + ], + ), +) + +# Create rest emitter +rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080") +rest_emitter.emit(event) diff --git a/metadata-ingestion/examples/library/dataset_schema_with_tags_terms.py b/metadata-ingestion/examples/library/dataset_schema_with_tags_terms.py new file mode 100644 index 0000000000000..cb8f930cc879f --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_schema_with_tags_terms.py @@ -0,0 +1,72 @@ +# Imports for urn construction utility methods + +from datahub.emitter.mce_builder import ( + make_data_platform_urn, + make_dataset_urn, + make_tag_urn, + make_term_urn, +) +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.rest_emitter import DatahubRestEmitter + +# Imports for metadata model classes +from datahub.metadata.schema_classes import ( + AuditStampClass, + ChangeTypeClass, + GlobalTagsClass, + GlossaryTermAssociationClass, + GlossaryTermsClass, + OtherSchemaClass, + SchemaFieldClass, + SchemaFieldDataTypeClass, + SchemaMetadataClass, + StringTypeClass, + TagAssociationClass, +) + +event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper( + entityType="dataset", + changeType=ChangeTypeClass.UPSERT, + entityUrn=make_dataset_urn(platform="hive", name="foodb.barTable", env="PROD"), + aspectName="schemaMetadata", + aspect=SchemaMetadataClass( + schemaName="customer", # not used + platform=make_data_platform_urn("hive"), # important <- platform must be an urn + version=0, # when the source system has a notion of versioning of schemas, insert this in, otherwise leave as 0 + hash="", # when the source system has a notion of unique schemas identified via hash, include a hash, else leave it as empty string + platformSchema=OtherSchemaClass(rawSchema="__insert raw schema here__"), + fields=[ + SchemaFieldClass( + fieldPath="address.zipcode", + type=SchemaFieldDataTypeClass(type=StringTypeClass()), + nativeDataType="VARCHAR(100)", # use this to provide the type of the field in the source system's vernacular + jsonPath="", # Unused field, can omit + nullable=True, + description="This is the zipcode of the address. Specified using extended form and limited to addresses in the United States", + recursive=False, # Unused field, can omit + # It is rare to attach tags to fields as part of the technical schema unless you are purely reflecting state that exists in the source system. + # For an editable (in UI) version of this, use the editableSchemaMetadata aspect + globalTags=GlobalTagsClass( + tags=[TagAssociationClass(tag=make_tag_urn("location"))] + ), + # It is rare to attach glossary terms to fields as part of the technical schema unless you are purely reflecting state that exists in the source system. + # For an editable (in UI) version of this, use the editableSchemaMetadata aspect + glossaryTerms=GlossaryTermsClass( + terms=[ + GlossaryTermAssociationClass( + urn=make_term_urn("Classification.PII") + ) + ], + auditStamp=AuditStampClass( # represents the time when this term was attached to this field? + time=0, # time in milliseconds, leave as 0 if no time of association is known + actor="urn:li:corpuser:ingestion", # if this is a system provided tag, use a bot user id like ingestion + ), + ), + ) + ], + ), +) + +# Create rest emitter +rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080") +rest_emitter.emit(event) diff --git a/metadata-ingestion/examples/library/dataset_set_tag.py b/metadata-ingestion/examples/library/dataset_set_tag.py new file mode 100644 index 0000000000000..e1b73e2521092 --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_set_tag.py @@ -0,0 +1,31 @@ +# Imports for urn construction utility methods +import logging + +from datahub.emitter.mce_builder import make_dataset_urn, make_tag_urn +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.rest_emitter import DatahubRestEmitter + +# Imports for metadata model classes +from datahub.metadata.schema_classes import ( + ChangeTypeClass, + GlobalTagsClass, + TagAssociationClass, +) + +log = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + +dataset_urn = make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD") +tag_urn = make_tag_urn("purchase") +event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper( + entityType="dataset", + changeType=ChangeTypeClass.UPSERT, + entityUrn=dataset_urn, + aspectName="globalTags", + aspect=GlobalTagsClass(tags=[TagAssociationClass(tag=tag_urn)]), +) + +# Create rest emitter +rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080") +rest_emitter.emit(event) +log.info(f"Set tags to {tag_urn} for dataset {dataset_urn}") diff --git a/metadata-ingestion/examples/library/dataset_set_term.py b/metadata-ingestion/examples/library/dataset_set_term.py new file mode 100644 index 0000000000000..11c1bdcddf38a --- /dev/null +++ b/metadata-ingestion/examples/library/dataset_set_term.py @@ -0,0 +1,44 @@ +import logging + +from datahub.emitter.mce_builder import make_dataset_urn, make_term_urn +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.rest_emitter import DatahubRestEmitter + +# Imports for metadata model classes +from datahub.metadata.schema_classes import ( + AuditStampClass, + ChangeTypeClass, + GlossaryTermAssociationClass, + GlossaryTermsClass, +) + +log = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + +# First we get the current terms +gms_endpoint = "http://localhost:8080" +rest_emitter = DatahubRestEmitter(gms_server=gms_endpoint) + +dataset_urn = make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD") + +term_to_add = make_term_urn("Classification.HighlyConfidential") +term_association_to_add = GlossaryTermAssociationClass(urn=term_to_add) +# an audit stamp that basically says we have no idea when these terms were added to this dataset +# change the time value to (time.time() * 1000) if you want to specify the current time of running this code as the time of the application +unknown_audit_stamp = AuditStampClass(time=0, actor="urn:li:corpuser:ingestion") + +# create a brand new terms aspect +terms_aspect = GlossaryTermsClass( + terms=[term_association_to_add], + auditStamp=unknown_audit_stamp, +) + +event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper( + entityType="dataset", + changeType=ChangeTypeClass.UPSERT, + entityUrn=dataset_urn, + aspectName="glossaryTerms", + aspect=terms_aspect, +) +rest_emitter.emit(event) +log.info(f"Attached term {term_to_add} to dataset {dataset_urn}") diff --git a/metadata-ingestion/scripts/modeldocgen.py b/metadata-ingestion/scripts/modeldocgen.py index 3a0c8f2f02929..ee8bc64ebeff5 100644 --- a/metadata-ingestion/scripts/modeldocgen.py +++ b/metadata-ingestion/scripts/modeldocgen.py @@ -1,13 +1,18 @@ +import glob import json import logging +import re import unittest.mock from dataclasses import Field, dataclass, field +from enum import Enum, auto from pathlib import Path -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Tuple, Union import avro.schema import click +from pydantic import validator +from datahub.configuration.common import ConfigModel from datahub.emitter.mce_builder import make_data_platform_urn, make_dataset_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.rest_emitter import DatahubRestEmitter @@ -40,6 +45,11 @@ def capitalize_first(something: str) -> str: return something[0:1].upper() + something[1:] +class EntityCategory(Enum): + CORE = auto() + INTERNAL = auto() + + @dataclass class EntityDefinition: name: str @@ -48,6 +58,10 @@ class EntityDefinition: aspect_map: Optional[Dict[str, Any]] = None relationship_map: Optional[Dict[str, str]] = None doc: Optional[str] = None + doc_file_contents: Optional[str] = None + # entities are by default in the CORE category unless specified otherwise + category: EntityCategory = EntityCategory.CORE + priority: Optional[int] = None # schema: Optional[avro.schema.Schema] = None # logical_schema: Optional[avro.schema.Schema] = None @@ -55,6 +69,12 @@ class EntityDefinition: # def lower_everything(cls, v: str) -> str: # return v.lower() + @validator("category", pre=True) + def find_match(cls, v: str) -> EntityCategory: + if isinstance(v, str) and v.upper() == "INTERNAL": + return EntityCategory.INTERNAL + return EntityCategory.CORE + @property def display_name(self): return capitalize_first(self.name) @@ -89,6 +109,9 @@ def get_aspects_from_snapshot( aspect_registry: Dict[str, AspectDefinition] = {} +# A holder for generated docs +generated_documentation: Dict[str, str] = {} + # Patch add_name method to NOT complain about duplicate names def add_name(self, name_attr, space_attr, new_schema): @@ -221,12 +244,48 @@ def make_entity_docs(entity_display_name: str, graph: RelationshipGraph) -> str: entity_name = entity_display_name[0:1].lower() + entity_display_name[1:] entity_def: Optional[EntityDefinition] = entity_registry.get(entity_name, None) if entity_def: - import pdb + doc = entity_def.doc_file_contents or ( + f"# {entity_def.display_name}\n{entity_def.doc}" + if entity_def.doc + else f"# {entity_def.display_name}" + ) + # create aspects section + aspects_section = "\n## Aspects\n" if entity_def.aspects else "" + + deprecated_aspects_section = "" + timeseries_aspects_section = "" + + for aspect in entity_def.aspects or []: + aspect_definition: AspectDefinition = aspect_registry.get(aspect) + assert aspect_definition + deprecated_message = ( + " (Deprecated)" + if aspect_definition.schema.get_prop("Deprecated") + else "" + ) + timeseries_qualifier = ( + " (Timeseries)" if aspect_definition.type == "timeseries" else "" + ) + this_aspect_doc = "" + this_aspect_doc += ( + f"\n### {aspect}{deprecated_message}{timeseries_qualifier}\n" + ) + this_aspect_doc += f"{aspect_definition.schema.get_prop('doc')}\n" + this_aspect_doc += f"
\nSchema\n\n" + # breakpoint() + this_aspect_doc += f"```javascript\n{json.dumps(aspect_definition.schema.to_json(), indent=2)}\n```\n
\n" + + if deprecated_message: + deprecated_aspects_section += this_aspect_doc + elif timeseries_qualifier: + timeseries_aspects_section += this_aspect_doc + else: + aspects_section += this_aspect_doc + + aspects_section += timeseries_aspects_section + deprecated_aspects_section - # breakpoint() - doc = entity_def.doc or f"This is the {entity_def.display_name} entity." # create relationships section - relationships_section = f"\n## Relationships\n" + relationships_section = "\n## Relationships\n" adjacency = graph.get_adjacency(entity_def.display_name) if adjacency.self_loop: relationships_section += f"\n### Self\nThese are the relationships to itself, stored in this entity's aspects" @@ -253,14 +312,15 @@ def make_entity_docs(entity_display_name: str, graph: RelationshipGraph) -> str: f"\n## [Global Metadata Model]({global_graph_url})" + f"\n![Global Graph]({global_graph_url})" ) - return doc + relationships_section + global_graph_section + final_doc = doc + aspects_section + relationships_section + global_graph_section + generated_documentation[entity_name] = final_doc + return final_doc else: raise Exception(f"Failed to find information for entity: {entity_name}") def generate_stitched_record(relnships_graph: RelationshipGraph) -> List[Any]: def strip_types(field_path: str) -> str: - import re final_path = field_path final_path = re.sub(r"(\[type=[a-zA-Z]+\]\.)", "", final_path) @@ -326,9 +386,6 @@ def strip_types(field_path: str) -> str: ) for f_field in schema_fields: if f_field.jsonProps: - import pdb - - # breakpoint() json_dict = json.loads(f_field.jsonProps) if "Aspect" in json_dict: aspect_info = json_dict["Aspect"] @@ -455,9 +512,6 @@ def strip_types(field_path: str) -> str: return events -from datahub.configuration.common import ConfigModel - - class EntityRegistry(ConfigModel): entities: List[EntityDefinition] @@ -467,30 +521,107 @@ def load_registry_file(registry_file: str) -> Dict[str, EntityDefinition]: with open(registry_file, "r") as f: registry = EntityRegistry.parse_obj(yaml.safe_load(f)) + index: int = 0 for entity_def in registry.entities: + index += 1 + entity_def.priority = index entity_registry[entity_def.name] = entity_def - return entity_registry +def get_sorted_entity_names( + entity_names: List[Tuple[str, EntityDefinition]] +) -> List[Tuple[str, List[str]]]: + core_entities = [ + (x, y) for (x, y) in entity_names if y.category == EntityCategory.CORE + ] + priority_bearing_core_entities = [(x, y) for (x, y) in core_entities if y.priority] + priority_bearing_core_entities.sort(key=lambda x: x[1].priority) + priority_bearing_core_entities = [x for (x, y) in priority_bearing_core_entities] + + non_priority_core_entities = [x for (x, y) in core_entities if not y.priority] + non_priority_core_entities.sort() + + internal_entities = [ + (x, y) for (x, y) in entity_names if y.category == EntityCategory.INTERNAL + ] + priority_bearing_internal_entities = [ + x for (x, y) in internal_entities if y.priority + ] + + non_priority_internal_entities = [ + x for (x, y) in internal_entities if not y.priority + ] + + sorted_entities = [ + ( + EntityCategory.CORE, + priority_bearing_core_entities + non_priority_core_entities, + ), + ( + EntityCategory.INTERNAL, + priority_bearing_internal_entities + non_priority_internal_entities, + ), + ] + + return sorted_entities + + +def preprocess_markdown(markdown_contents: str) -> str: + inline_pattern = re.compile(r"{{ inline (.*) }}") + pos = 0 + content_swap_register = {} + while inline_pattern.search(markdown_contents, pos=pos): + match = inline_pattern.search(markdown_contents, pos=pos) + file_name = match.group(1) + with open(file_name, "r") as fp: + inline_content = fp.read() + content_swap_register[match.span()] = inline_content + pos = match.span()[1] + processed_markdown = "" + cursor = 0 + for (start, end) in content_swap_register: + processed_markdown += ( + markdown_contents[cursor:start] + content_swap_register[(start, end)] + ) + cursor = end + processed_markdown += markdown_contents[cursor:] + return processed_markdown + + @click.command() @click.argument("schema_files", type=click.Path(exists=True), nargs=-1, required=True) @click.option("--server", type=str, required=False) @click.option("--file", type=str, required=False) -@click.option("--dot", type=str, required=False) +@click.option( + "--dot", type=str, required=False, help="generate a dot file representing the graph" +) @click.option("--png", type=str, required=False) +@click.option("--extra-docs", type=str, required=False) def generate( schema_files: List[str], server: Optional[str], file: Optional[str], dot: Optional[str], png: Optional[str], + extra_docs: Optional[str], ) -> None: logger.info(f"server = {server}") logger.info(f"file = {file}") logger.info(f"dot = {dot}") logger.info(f"png = {png}") + entity_extra_docs = {} + if extra_docs: + for path in glob.glob(f"{extra_docs}/**/*.md", recursive=True): + m = re.search("/docs/entities/(.*)/*.md", path) + if m: + entity_name = m.group(1) + with open(path, "r") as doc_file: + file_contents = doc_file.read() + final_markdown = preprocess_markdown(file_contents) + entity_extra_docs[entity_name] = final_markdown + for schema_file in schema_files: if schema_file.endswith(".yml") or schema_file.endswith(".yaml"): # registry file @@ -499,9 +630,42 @@ def generate( # schema file load_schema_file(schema_file) + if entity_extra_docs: + for entity_name in entity_extra_docs: + + entity_registry.get(entity_name).doc_file_contents = entity_extra_docs[ + entity_name + ] + relationship_graph = RelationshipGraph() events = generate_stitched_record(relationship_graph) + generated_docs_dir = "../docs/generated/metamodel" + import shutil + + shutil.rmtree(f"{generated_docs_dir}/entities", ignore_errors=True) + entity_names = [(x, entity_registry.get(x)) for x in generated_documentation] + + sorted_entity_names = get_sorted_entity_names(entity_names) + + index = 0 + for category, sorted_entities in sorted_entity_names: + for entity_name in sorted_entities: + entity_def = entity_registry.get(entity_name) + + entity_category = entity_def.category + entity_dir = f"{generated_docs_dir}/entities/" + import os + + os.makedirs(entity_dir, exist_ok=True) + + with open(f"{entity_dir}/{entity_name}.md", "w") as fp: + fp.write("---\n") + fp.write(f"sidebar_position: {index}\n") + fp.write("---\n") + fp.write(generated_documentation[entity_name]) + index += 1 + if file: logger.info(f"Will write events to {file}") Path(file).parent.mkdir(parents=True, exist_ok=True) diff --git a/metadata-ingestion/scripts/modeldocgen.sh b/metadata-ingestion/scripts/modeldocgen.sh index 77924c0c1e713..ec97845309070 100755 --- a/metadata-ingestion/scripts/modeldocgen.sh +++ b/metadata-ingestion/scripts/modeldocgen.sh @@ -6,8 +6,9 @@ OUTDIR=./generated/docs # Note: this assumes that datahub has already been built with `./gradlew build`. DATAHUB_ROOT=.. REGISTRY_ROOT="$DATAHUB_ROOT/metadata-models/src/main/resources" -SCHEMAS_ROOT="$DATAHUB_ROOT/metadata-events/mxe-schemas/src/mainGeneratedAvroSchema/avro" +SCHEMAS_ROOT="$DATAHUB_ROOT/metadata-events/mxe-schemas/src/mainGeneratedAvroSchema/avro/" FILES="$REGISTRY_ROOT/entity-registry.yml $SCHEMAS_ROOT/com/linkedin/mxe/MetadataChangeEvent.avsc" +METADATA_MODEL_DOCS_ROOT="$DATAHUB_ROOT/metadata-models/docs" # Since we depend on jq, check if jq is installed if ! which jq > /dev/null; then echo "jq is not installed. Please install jq and rerun (https://stedolan.github.io/jq/)" @@ -27,5 +28,6 @@ done FILES=$(cat /tmp/docgen_files.txt) rm -r $OUTDIR || true -#echo $FILES -python scripts/modeldocgen.py $FILES --dot generated/docs/metadata_graph.dot --file generated/docs/metadata_model_mces.json $@ +python scripts/modeldocgen.py $FILES --file generated/docs/metadata_model_mces.json --extra-docs ${METADATA_MODEL_DOCS_ROOT} $@ +## Full version of this command that generates dot files and png files (requires pydot and graphviz) +# python scripts/modeldocgen.py $FILES --dot generated/docs/metadata_graph.dot --file generated/docs/metadata_model_mces.json --extra-docs ${METADATA_MODEL_DOCS_ROOT} --png generated/docs/metadata_graph.png $@ diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 3d24c8eff855d..8b0ea342c8253 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -45,6 +45,10 @@ def get_long_description(): "tabulate", "progressbar2", "psutil>=5.8.0", + # Markupsafe breaking change broke Jinja and some other libs + # Pinning it to a version which works even though we are not using explicitly + # https://github.com/aws/aws-sam-cli/issues/3661 + "markupsafe==2.0.1", } kafka_common = { @@ -107,11 +111,17 @@ def get_long_description(): "azure-ad": set(), "bigquery": sql_common | bigquery_common | {"pybigquery >= 0.6.0"}, "bigquery-usage": bigquery_common | {"cachetools"}, + "clickhouse": sql_common | {"clickhouse-sqlalchemy==0.1.8"}, + "clickhouse-usage": sql_common | {"clickhouse-sqlalchemy==0.1.8"}, "datahub-business-glossary": set(), "data-lake": {*aws_common, "pydeequ==1.0.1", "pyspark==3.0.3", "parse==1.19.0"}, "dbt": {"requests"}, "druid": sql_common | {"pydruid>=0.6.2"}, - "elasticsearch": {"elasticsearch"}, + # Starting with 7.14.0 python client is checking if it is connected to elasticsearch client. If its not it throws + # UnsupportedProductError + # https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/release-notes.html#rn-7-14-0 + # https://github.com/elastic/elasticsearch-py/issues/1639#issuecomment-883587433 + "elasticsearch": {"elasticsearch==7.13.4"}, "feast": {"docker"}, "glue": aws_common, "hive": sql_common @@ -202,12 +212,14 @@ def get_long_description(): "jsonpickle", "build", "twine", - "pydot", + "packaging", *list( dependency for plugin in [ "bigquery", "bigquery-usage", + "clickhouse", + "clickhouse-usage", "elasticsearch", "looker", "glue", @@ -260,6 +272,7 @@ def get_long_description(): for plugin in [ # Only include Athena for Python 3.7 or newer. *(["athena"] if is_py37_or_newer else []), + "clickhouse", "druid", "feast", "hive", @@ -285,6 +298,8 @@ def get_long_description(): "azure-ad = datahub.ingestion.source.identity.azure_ad:AzureADSource", "bigquery = datahub.ingestion.source.sql.bigquery:BigQuerySource", "bigquery-usage = datahub.ingestion.source.usage.bigquery_usage:BigQueryUsageSource", + "clickhouse = datahub.ingestion.source.sql.clickhouse:ClickHouseSource", + "clickhouse-usage = datahub.ingestion.source.usage.clickhouse_usage:ClickHouseUsageSource", "data-lake = datahub.ingestion.source.data_lake:DataLakeSource", "dbt = datahub.ingestion.source.dbt:DBTSource", "druid = datahub.ingestion.source.sql.druid:DruidSource", diff --git a/metadata-ingestion/source_docs/clickhouse.md b/metadata-ingestion/source_docs/clickhouse.md new file mode 100644 index 0000000000000..fe3cd8429d9dc --- /dev/null +++ b/metadata-ingestion/source_docs/clickhouse.md @@ -0,0 +1,177 @@ +# ClickHouse + +For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md). + +## Setup + +To install this plugin, run `pip install 'acryl-datahub[clickhouse]'`. + +## Capabilities + +This plugin extracts the following: + +- Metadata for tables, views, materialized views and dictionaries +- Column types associated with each table(except *AggregateFunction and DateTime with timezone) +- Table, row, and column statistics via optional [SQL profiling](./sql_profiles.md) +- Table, view, materialized view and dictionary(with CLICKHOUSE source_type) lineage + +:::tip + +You can also get fine-grained usage statistics for ClickHouse using the `clickhouse-usage` source described below. + +::: + +## Quickstart recipe + +Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options. + +For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes). + +```yml +source: + type: clickhouse + config: + # Coordinates + host_port: localhost:9000 + + # Credentials + username: user + password: pass + + # Options + platform_instance: DatabaseNameToBeIngested + + include_views: True # whether to include views, defaults to True + include_tables: True # whether to include views, defaults to True + +sink: + # sink configs +``` + +
+ Extra options to use encryption connection or different interface + +For the HTTP interface: +```yml +source: + type: clickhouse + config: + host_port: localhost:8443 + protocol: https + +``` + +For the Native interface: +```yml +source: + type: clickhouse + config: + host_port: localhost:9440 + scheme: clickhouse+native + secure: True +``` + +
+ +## Config details + +Like all SQL-based sources, the ClickHouse integration supports: +- Stale Metadata Deletion: See [here](./stateful_ingestion.md) for more details on configuration. +- SQL Profiling: See [here](./sql_profiles.md) for more details on configuration. + +Note that a `.` is used to denote nested fields in the YAML recipe. + +| Field | Required | Default | Description | +|-----------------------------|----------|----------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `username` | | | ClickHouse username. | +| `password` | | | ClickHouse password. | +| `host_port` | ✅ | | ClickHouse host URL. | +| `database` | | | ClickHouse database to connect. | +| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. | +| `platform_instance` | | None | The Platform instance to use while constructing URNs. | +| `options.