From 82204eb451c0dd47e08a03ea44508b9b78aa5407 Mon Sep 17 00:00:00 2001 From: Jay <159848059+jayacryl@users.noreply.github.com> Date: Mon, 17 Jun 2024 15:37:12 -0400 Subject: [PATCH 1/6] feat(observe) expose assertion runId and lastObservedMillis to graphql (#10726) --- .../types/dataset/mappers/AssertionRunEventMapper.java | 1 + datahub-graphql-core/src/main/resources/entity.graphql | 7 ++++++- .../resolvers/assertion/AssertionRunEventResolverTest.java | 6 +++++- datahub-web-react/src/graphql/assertion.graphql | 2 ++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/AssertionRunEventMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/AssertionRunEventMapper.java index e63335beef9c1..89e636e606601 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/AssertionRunEventMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/AssertionRunEventMapper.java @@ -38,6 +38,7 @@ public com.linkedin.datahub.graphql.generated.AssertionRunEvent apply( final com.linkedin.datahub.graphql.generated.AssertionRunEvent assertionRunEvent = new com.linkedin.datahub.graphql.generated.AssertionRunEvent(); + assertionRunEvent.setLastObservedMillis(envelopedAspect.getSystemMetadata().getLastObserved()); assertionRunEvent.setTimestampMillis(gmsAssertionRunEvent.getTimestampMillis()); assertionRunEvent.setAssertionUrn(gmsAssertionRunEvent.getAssertionUrn().toString()); assertionRunEvent.setAsserteeUrn(gmsAssertionRunEvent.getAsserteeUrn().toString()); diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 316bdd7ef5279..fa774d34ed7a4 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -7404,6 +7404,11 @@ type AssertionRunEvent implements TimeSeriesAspect { """ timestampMillis: Long! + """ + The time at which the run event was last observed by the DataHub system - ie, when it was reported by external systems + """ + lastObservedMillis: Long + """ Urn of assertion which is evaluated """ @@ -7420,7 +7425,7 @@ type AssertionRunEvent implements TimeSeriesAspect { runId: String! """ - The status of the assertion run as per this timeseries event. + The status of the assertion run as per this timeseries event """ status: AssertionRunStatus! diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java index c047a0d0a3f05..845ce1e6129d8 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java @@ -18,6 +18,7 @@ import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.SystemMetadata; import graphql.schema.DataFetchingEnvironment; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -58,7 +59,9 @@ public void testGetSuccess() throws Exception { null, AssertionRunStatus.COMPLETE.toString())))) .thenReturn( ImmutableList.of( - new EnvelopedAspect().setAspect(GenericRecordUtils.serializeAspect(gmsRunEvent)))); + new EnvelopedAspect() + .setAspect(GenericRecordUtils.serializeAspect(gmsRunEvent)) + .setSystemMetadata(new SystemMetadata().setLastObserved(12L)))); AssertionRunEventResolver resolver = new AssertionRunEventResolver(mockClient); @@ -108,6 +111,7 @@ public void testGetSuccess() throws Exception { graphqlRunEvent.getStatus(), com.linkedin.datahub.graphql.generated.AssertionRunStatus.COMPLETE); assertEquals((float) graphqlRunEvent.getTimestampMillis(), 12L); + assertEquals((float) graphqlRunEvent.getLastObservedMillis(), 12L); assertEquals((float) graphqlRunEvent.getResult().getActualAggValue(), 10); assertEquals((long) graphqlRunEvent.getResult().getMissingCount(), 0L); assertEquals((long) graphqlRunEvent.getResult().getRowCount(), 1L); diff --git a/datahub-web-react/src/graphql/assertion.graphql b/datahub-web-react/src/graphql/assertion.graphql index 016e8a4f06086..8cf61038e4dab 100644 --- a/datahub-web-react/src/graphql/assertion.graphql +++ b/datahub-web-react/src/graphql/assertion.graphql @@ -55,8 +55,10 @@ fragment assertionDetails on Assertion { fragment assertionRunEventDetails on AssertionRunEvent { timestampMillis + lastObservedMillis assertionUrn status + runId runtimeContext { key value From 0816e7590c2ee9d2c34ef5b071acb254e8486ddf Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 17 Jun 2024 13:50:08 -0700 Subject: [PATCH 2/6] fix(ingest): pin numpy<2 for classification (#10725) --- metadata-ingestion/setup.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 7f51b39c2731b..04099e0a24b9f 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -107,6 +107,10 @@ # This is a bit of a hack. Because we download the SpaCy model at runtime in the classify plugin, # we need pip to be available. "pip", + # We were seeing an error like this `numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject` + # with numpy 2.0. This likely indicates a mismatch between scikit-learn and numpy versions. + # https://stackoverflow.com/questions/40845304/runtimewarning-numpy-dtype-size-changed-may-indicate-binary-incompatibility + "numpy<2", } sql_common = ( From 39081ece15c8965ae94f0680cd7803831f4a24c4 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 17 Jun 2024 13:54:28 -0700 Subject: [PATCH 3/6] feat(ingest/bigquery): support using table read permission without profiling (#10699) --- .../src/datahub/ingestion/source/bigquery_v2/bigquery.py | 6 +++--- .../ingestion/source/bigquery_v2/bigquery_config.py | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index f9004b9ba9f86..d2d4f13ceb187 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -365,7 +365,7 @@ def metadata_read_capability_test( project_id=project_id, dataset_name=result[0].name, tables={}, - with_data_read_permission=config.is_profiling_enabled(), + with_data_read_permission=config.have_table_data_read_permission, ) if len(list(tables)) == 0: return CapabilityReport( @@ -1380,7 +1380,7 @@ def get_tables_for_dataset( project_id, dataset_name, items_to_get, - with_data_read_permission=self.config.is_profiling_enabled(), + with_data_read_permission=self.config.have_table_data_read_permission, ) items_to_get.clear() @@ -1389,7 +1389,7 @@ def get_tables_for_dataset( project_id, dataset_name, items_to_get, - with_data_read_permission=self.config.is_profiling_enabled(), + with_data_read_permission=self.config.have_table_data_read_permission, ) self.report.metadata_extraction_sec[f"{project_id}.{dataset_name}"] = round( diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py index 2c7a53ef2bdca..578c9dddbd2e4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py @@ -184,6 +184,15 @@ class BigQueryV2Config( description="Number of partitioned table queried in batch when getting metadata. This is a low level config property which should be touched with care. This restriction is needed because we query partitions system view which throws error if we try to touch too many tables.", ) + use_tables_list_query_v2: bool = Field( + default=False, + description="List tables using an improved query that extracts partitions and last modified timestamps more accurately. Requires the ability to read table data. Automatically enabled when profiling is enabled.", + ) + + @property + def have_table_data_read_permission(self) -> bool: + return self.use_tables_list_query_v2 or self.is_profiling_enabled() + column_limit: int = Field( default=300, description="Maximum number of columns to process in a table. This is a low level config property which should be touched with care. This restriction is needed because excessively wide tables can result in failure to ingest the schema.", From 07df5f872a49318d695c6dc48ec551fbc25bdd61 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 17 Jun 2024 13:55:08 -0700 Subject: [PATCH 4/6] fix(ingest/looker): fix looker browse paths v2 (#10700) --- .../ingestion/source/looker/looker_common.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index ec78b15348701..b53fe27745fc6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -1137,16 +1137,9 @@ def _to_metadata_events( # noqa: C901 ] # Add tags - explore_tag_urns: List[TagAssociationClass] = [] - for tag in self.tags: - tag_urn = TagUrn(tag) - explore_tag_urns.append(TagAssociationClass(tag_urn.urn())) - proposals.append( - MetadataChangeProposalWrapper( - entityUrn=tag_urn.urn(), - aspect=tag_urn.to_key_aspect(), - ) - ) + explore_tag_urns: List[TagAssociationClass] = [ + TagAssociationClass(tag=TagUrn(tag).urn()) for tag in self.tags + ] if explore_tag_urns: dataset_snapshot.aspects.append(GlobalTagsClass(explore_tag_urns)) From 333799c338d386db453e579586503e1d6f779612 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Mon, 17 Jun 2024 19:53:54 -0500 Subject: [PATCH 5/6] feat(strucutred-properties): structured properties delete and schema change support (#10711) --- .../graphql/resolvers/ResolverUtils.java | 36 +- .../assertion/AssertionRunEventResolver.java | 12 +- .../auth/ListAccessTokensResolver.java | 5 +- .../resolvers/chart/BrowseV2Resolver.java | 4 +- .../ListDataProductAssetsResolver.java | 5 +- .../domain/DomainEntitiesResolver.java | 4 +- .../CreateDynamicFormAssignmentResolver.java | 4 +- .../source/ListIngestionSourcesResolver.java | 5 +- .../load/TimeSeriesAspectResolver.java | 8 +- .../resolvers/mutate/util/FormUtils.java | 9 +- .../ownership/ListOwnershipTypesResolver.java | 5 +- .../policy/ListPoliciesResolver.java | 6 +- .../resolvers/query/ListQueriesResolver.java | 8 +- .../ListRecommendationsResolver.java | 10 +- .../AggregateAcrossEntitiesResolver.java | 4 +- .../search/AutoCompleteResolver.java | 8 +- .../resolvers/search/AutocompleteUtils.java | 7 +- .../search/ScrollAcrossEntitiesResolver.java | 4 +- .../search/ScrollAcrossLineageResolver.java | 5 +- .../search/SearchAcrossEntitiesResolver.java | 5 +- .../search/SearchAcrossLineageResolver.java | 5 +- .../resolvers/search/SearchResolver.java | 5 +- .../resolvers/view/CreateViewResolver.java | 4 +- .../view/ListGlobalViewsResolver.java | 8 +- .../resolvers/view/ListMyViewsResolver.java | 13 +- .../resolvers/view/UpdateViewResolver.java | 3 +- .../graphql/resolvers/view/ViewUtils.java | 25 +- .../graphql/resolvers/ResolverUtilsTest.java | 14 +- .../AssertionRunEventResolverTest.java | 6 +- .../auth/ListAccessTokensResolverTest.java | 2 +- .../browse/BrowseV2ResolverTest.java | 4 +- .../query/ListQueriesResolverTest.java | 2 +- .../graphql/resolvers/view/ViewUtilsTest.java | 4 +- datahub-upgrade/build.gradle | 21 + .../upgrade/config/BuildIndicesConfig.java | 7 +- .../upgrade/config/CleanIndicesConfig.java | 7 +- .../system/elasticsearch/BuildIndices.java | 75 +- .../system/elasticsearch/CleanIndices.java | 20 +- .../steps/BuildIndicesPostStep.java | 15 +- .../steps/BuildIndicesPreStep.java | 71 +- .../elasticsearch/steps/BuildIndicesStep.java | 11 +- .../elasticsearch/steps/CleanIndicesStep.java | 11 +- .../system/elasticsearch/util/IndexUtils.java | 21 +- docs/api/tutorials/structured-properties.md | 1024 ++++++++++++++++- docs/deploy/environment-vars.md | 36 +- entity-registry/build.gradle | 4 + .../metadata/aspect/RetrieverContext.java | 4 + .../metadata/aspect/batch/AspectsBatch.java | 10 + .../aspect/plugins/PluginFactory.java | 25 + .../aspect/plugins/hooks/MCPSideEffect.java | 36 + .../metadata/entity/SearchRetriever.java | 24 + .../models/StructuredPropertyUtils.java | 167 ++- .../plugins/hooks/MCPSideEffectTest.java | 8 + .../metadata/aspect/MockAspectRetriever.java | 5 +- .../metadata/aspect/TestEntityRegistry.java | 0 .../test/metadata/aspect/batch/TestMCL.java | 32 + .../test/metadata/aspect/batch/TestMCP.java | 0 .../java/com/linkedin/metadata/Constants.java | 5 + metadata-ingestion/scripts/modeldocgen.py | 1 + metadata-io/build.gradle | 2 + .../entity/ebean/batch/PatchItemImpl.java | 26 + .../aspect/utils/DefaultAspectsUtil.java | 2 +- .../metadata/entity/EntityServiceImpl.java | 82 +- .../graph/dgraph/DgraphGraphService.java | 3 - .../elastic/ElasticSearchGraphService.java | 19 +- .../graph/neo4j/Neo4jGraphService.java | 5 - .../candidatesource/MostPopularSource.java | 4 +- .../candidatesource/RecentlyEditedSource.java | 13 +- .../candidatesource/RecentlyViewedSource.java | 13 +- .../search/SearchServiceSearchRetriever.java | 51 + .../elasticsearch/ElasticSearchService.java | 21 +- .../indexbuilder/ESIndexBuilder.java | 8 +- .../indexbuilder/EntityIndexBuilders.java | 32 +- .../indexbuilder/MappingsBuilder.java | 28 +- .../indexbuilder/ReindexConfig.java | 146 ++- .../elasticsearch/query/ESBrowseDAO.java | 12 +- .../elasticsearch/query/ESSearchDAO.java | 38 +- .../request/AggregationQueryBuilder.java | 70 +- .../request/AutocompleteRequestHandler.java | 17 +- .../query/request/SearchQueryBuilder.java | 25 +- .../query/request/SearchRequestHandler.java | 19 +- .../SearchDocumentTransformer.java | 63 +- .../metadata/search/utils/ESUtils.java | 194 ++-- .../BusinessAttributeUpdateHookService.java | 2 +- .../service/UpdateIndicesService.java | 32 +- .../metadata/shared/ElasticSearchIndexed.java | 16 +- .../PropertyDefinitionDeleteSideEffect.java | 201 ++++ .../hooks/StructuredPropertiesSoftDelete.java | 2 +- .../PropertyDefinitionValidator.java | 97 +- .../StructuredPropertiesValidator.java | 39 +- .../ElasticSearchSystemMetadataService.java | 20 +- .../ElasticSearchTimeseriesAspectService.java | 50 +- .../TimeseriesAspectIndexBuilders.java | 14 +- .../elastic/query/ESAggregatedStatsDAO.java | 12 +- .../metadata/AspectIngestionUtils.java | 6 +- .../entity/CassandraEntityServiceTest.java | 4 +- .../entity/EbeanEntityServiceTest.java | 4 +- .../metadata/entity/EntityServiceTest.java | 52 +- .../graph/dgraph/DgraphContainer.java | 3 - .../search/SearchGraphServiceTestBase.java | 4 +- .../search/LineageServiceTestBase.java | 2 +- .../search/SearchServiceTestBase.java | 3 +- .../metadata/search/TestEntityTestBase.java | 3 +- .../indexbuilder/IndexBuilderTestBase.java | 9 + .../indexbuilder/MappingsBuilderTest.java | 176 ++- .../request/AggregationQueryBuilderTest.java | 201 +++- .../AutocompleteRequestHandlerTest.java | 23 +- .../request/SearchRequestHandlerTest.java | 36 +- .../SearchDocumentTransformerTest.java | 5 + .../metadata/search/utils/ESUtilsTest.java | 188 ++- ...ropertyDefinitionDeleteSideEffectTest.java | 193 ++++ .../StructuredPropertiesSoftDeleteTest.java | 2 +- .../PropertyDefinitionValidatorTest.java | 187 +-- .../StructuredPropertiesValidatorTest.java | 4 +- .../SystemMetadataServiceTestBase.java | 3 +- .../TimeseriesAspectServiceTestBase.java | 3 +- .../SampleDataFixtureConfiguration.java | 4 +- .../SearchLineageFixtureConfiguration.java | 7 +- .../test/search/SearchTestUtils.java | 4 +- .../SearchTestContainerConfiguration.java | 1 + .../kafka/MaeConsumerApplication.java | 1 + .../hook/BusinessAttributeUpdateHookTest.java | 4 +- .../CustomDataQualityRulesMCPSideEffect.java | 8 + .../StructuredPropertyDefinition.pdl | 8 + .../src/main/resources/entity-registry.yml | 27 +- .../metadata/context/OperationContext.java | 10 + .../metadata/context/RetrieverContext.java | 2 + .../context/TestOperationContexts.java | 37 +- .../token/StatefulTokenServiceTest.java | 3 +- .../SystemOperationContextFactory.java | 20 +- .../ElasticSearchIndexBuilderFactory.java | 4 + .../IngestDataPlatformInstancesStep.java | 2 +- .../controller/GenericEntitiesController.java | 18 +- .../resources/entity/AspectResourceTest.java | 2 +- .../mock/MockTimeseriesAspectService.java | 3 - .../metadata/entity/EntityService.java | 10 +- .../metadata/entity/RollbackResult.java | 23 + .../metadata/entity/RollbackRunResult.java | 1 + .../metadata/entity/UpdateAspectResult.java | 33 +- .../linkedin/metadata/graph/GraphService.java | 2 +- .../metadata/search/EntitySearchService.java | 2 +- .../systemmetadata/SystemMetadataService.java | 2 +- .../timeseries/TimeseriesAspectService.java | 2 +- .../gms/servlet/ConfigSearchExport.java | 3 +- .../test_structured_properties.py | 133 ++- 145 files changed, 3826 insertions(+), 928 deletions(-) create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java rename entity-registry/src/{test => testFixtures}/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java (96%) rename entity-registry/src/{test => testFixtures}/java/com/linkedin/test/metadata/aspect/TestEntityRegistry.java (100%) create mode 100644 entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCL.java rename entity-registry/src/{test => testFixtures}/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java (100%) create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java rename {entity-registry/src/main/java/com/linkedin/metadata/aspect => metadata-io/src/main/java/com/linkedin/metadata/structuredproperties}/hooks/StructuredPropertiesSoftDelete.java (96%) rename {entity-registry/src/main/java/com/linkedin/metadata/aspect => metadata-io/src/main/java/com/linkedin/metadata/structuredproperties}/validation/PropertyDefinitionValidator.java (68%) rename {entity-registry/src/main/java/com/linkedin/metadata/aspect => metadata-io/src/main/java/com/linkedin/metadata/structuredproperties}/validation/StructuredPropertiesValidator.java (94%) create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java rename {entity-registry/src/test/java/com/linkedin/metadata/aspect => metadata-io/src/test/java/com/linkedin/metadata/structuredproperties}/hooks/StructuredPropertiesSoftDeleteTest.java (98%) rename {entity-registry/src/test/java/com/linkedin/metadata/aspect => metadata-io/src/test/java/com/linkedin/metadata/structuredproperties}/validators/PropertyDefinitionValidatorTest.java (70%) rename {entity-registry/src/test/java/com/linkedin/metadata/aspect => metadata-io/src/test/java/com/linkedin/metadata/structuredproperties}/validators/StructuredPropertiesValidatorTest.java (99%) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java index f2682ad050c86..542745e014862 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java @@ -15,6 +15,7 @@ import com.linkedin.datahub.graphql.generated.AndFilterInput; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.resolvers.search.SearchUtils; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -112,10 +113,11 @@ public static Map buildFacetFilters( return facetFilters; } - public static List criterionListFromAndFilter(List andFilters) { + public static List criterionListFromAndFilter( + List andFilters, @Nullable AspectRetriever aspectRetriever) { return andFilters != null && !andFilters.isEmpty() ? andFilters.stream() - .map(filter -> criterionFromFilter(filter)) + .map(filter -> criterionFromFilter(filter, aspectRetriever)) .collect(Collectors.toList()) : Collections.emptyList(); } @@ -124,13 +126,14 @@ public static List criterionListFromAndFilter(List // conjunctive criterion // arrays, rather than just one for the AND case. public static ConjunctiveCriterionArray buildConjunctiveCriterionArrayWithOr( - @Nonnull List orFilters) { + @Nonnull List orFilters, @Nullable AspectRetriever aspectRetriever) { return new ConjunctiveCriterionArray( orFilters.stream() .map( orFilter -> { CriterionArray andCriterionForOr = - new CriterionArray(criterionListFromAndFilter(orFilter.getAnd())); + new CriterionArray( + criterionListFromAndFilter(orFilter.getAnd(), aspectRetriever)); return new ConjunctiveCriterion().setAnd(andCriterionForOr); }) .collect(Collectors.toList())); @@ -138,7 +141,9 @@ public static ConjunctiveCriterionArray buildConjunctiveCriterionArrayWithOr( @Nullable public static Filter buildFilter( - @Nullable List andFilters, @Nullable List orFilters) { + @Nullable List andFilters, + @Nullable List orFilters, + @Nullable AspectRetriever aspectRetriever) { if ((andFilters == null || andFilters.isEmpty()) && (orFilters == null || orFilters.isEmpty())) { return null; @@ -147,30 +152,33 @@ public static Filter buildFilter( // Or filters are the new default. We will check them first. // If we have OR filters, we need to build a series of CriterionArrays if (orFilters != null && !orFilters.isEmpty()) { - return new Filter().setOr(buildConjunctiveCriterionArrayWithOr(orFilters)); + return new Filter().setOr(buildConjunctiveCriterionArrayWithOr(orFilters, aspectRetriever)); } // If or filters are not set, someone may be using the legacy and filters - final List andCriterions = criterionListFromAndFilter(andFilters); + final List andCriterions = criterionListFromAndFilter(andFilters, aspectRetriever); return new Filter() .setOr( new ConjunctiveCriterionArray( new ConjunctiveCriterion().setAnd(new CriterionArray(andCriterions)))); } - public static Criterion criterionFromFilter(final FacetFilterInput filter) { - return criterionFromFilter(filter, false); + public static Criterion criterionFromFilter( + final FacetFilterInput filter, @Nullable AspectRetriever aspectRetriever) { + return criterionFromFilter(filter, false, aspectRetriever); } // Translates a FacetFilterInput (graphql input class) into Criterion (our internal model) public static Criterion criterionFromFilter( - final FacetFilterInput filter, final Boolean skipKeywordSuffix) { + final FacetFilterInput filter, + final Boolean skipKeywordSuffix, + @Nullable AspectRetriever aspectRetriever) { Criterion result = new Criterion(); if (skipKeywordSuffix) { result.setField(filter.getField()); } else { - result.setField(getFilterField(filter.getField(), skipKeywordSuffix)); + result.setField(getFilterField(filter.getField(), skipKeywordSuffix, aspectRetriever)); } // `value` is deprecated in place of `values`- this is to support old query patterns. If values @@ -205,11 +213,13 @@ public static Criterion criterionFromFilter( } private static String getFilterField( - final String originalField, final boolean skipKeywordSuffix) { + final String originalField, + final boolean skipKeywordSuffix, + @Nullable AspectRetriever aspectRetriever) { if (KEYWORD_EXCLUDED_FILTERS.contains(originalField)) { return originalField; } - return ESUtils.toKeywordField(originalField, skipKeywordSuffix); + return ESUtils.toKeywordField(originalField, skipKeywordSuffix, aspectRetriever); } public static Filter buildFilterWithUrns(@Nonnull Set urns, @Nullable Filter inputFilters) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java index 18f8ad85668d8..0e9d2cea61141 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java @@ -15,6 +15,7 @@ import com.linkedin.datahub.graphql.types.dataset.mappers.AssertionRunEventMapper; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -67,7 +68,10 @@ public CompletableFuture get(DataFetchingEnvironment e maybeStartTimeMillis, maybeEndTimeMillis, maybeLimit, - buildFilter(maybeFilters, maybeStatus)); + buildFilter( + maybeFilters, + maybeStatus, + context.getOperationContext().getAspectRetriever())); // Step 2: Bind profiles into GraphQL strong types. List runEvents = @@ -120,7 +124,9 @@ public CompletableFuture get(DataFetchingEnvironment e @Nullable public static Filter buildFilter( - @Nullable FilterInput filtersInput, @Nullable final String status) { + @Nullable FilterInput filtersInput, + @Nullable final String status, + @Nullable AspectRetriever aspectRetriever) { if (filtersInput == null && status == null) { return null; } @@ -141,7 +147,7 @@ public static Filter buildFilter( .setAnd( new CriterionArray( facetFilters.stream() - .map(filter -> criterionFromFilter(filter, true)) + .map(filter -> criterionFromFilter(filter, true, aspectRetriever)) .collect(Collectors.toList()))))); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java index eaac2aedef03a..dc57ed3c673c1 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java @@ -70,7 +70,10 @@ public CompletableFuture get(DataFetchingEnvironment envi .withSearchFlags(flags -> flags.setFulltext(true)), Constants.ACCESS_TOKEN_ENTITY_NAME, "", - buildFilter(filters, Collections.emptyList()), + buildFilter( + filters, + Collections.emptyList(), + context.getOperationContext().getAspectRetriever()), sortCriterion, start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java index 18ee5f595ce58..b54ca398aef98 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java @@ -74,7 +74,9 @@ public CompletableFuture get(DataFetchingEnvironment environmen ? BROWSE_PATH_V2_DELIMITER + String.join(BROWSE_PATH_V2_DELIMITER, input.getPath()) : ""; - final Filter inputFilter = ResolverUtils.buildFilter(null, input.getOrFilters()); + final Filter inputFilter = + ResolverUtils.buildFilter( + null, input.getOrFilters(), context.getOperationContext().getAspectRetriever()); BrowseResultV2 browseResults = _entityClient.browseV2( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java index 041de9f58db23..320d89cdec164 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java @@ -132,7 +132,10 @@ public CompletableFuture get(DataFetchingEnvironment environment) // add urns from the aspect to our filters final Filter baseFilter = - ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); + ResolverUtils.buildFilter( + input.getFilters(), + input.getOrFilters(), + context.getOperationContext().getAspectRetriever()); final Filter finalFilter = buildFilterWithUrns(new HashSet<>(assetUrns), baseFilter); final SearchFlags searchFlags; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java index 9ca5de86034da..75796f637525e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java @@ -78,7 +78,9 @@ public CompletableFuture get(final DataFetchingEnvironment enviro .getFilters() .forEach( filter -> { - criteria.add(criterionFromFilter(filter, true)); + criteria.add( + criterionFromFilter( + filter, true, context.getOperationContext().getAspectRetriever())); }); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java index b9d74f8af660e..3cf4d9175d45b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java @@ -33,7 +33,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final CreateDynamicFormAssignmentInput input = bindArgument(environment.getArgument("input"), CreateDynamicFormAssignmentInput.class); final Urn formUrn = UrnUtils.getUrn(input.getFormUrn()); - final DynamicFormAssignment formAssignment = FormUtils.mapDynamicFormAssignment(input); + final DynamicFormAssignment formAssignment = + FormUtils.mapDynamicFormAssignment( + input, context.getOperationContext().getAspectRetriever()); return GraphQLConcurrencyUtils.supplyAsync( () -> { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java index 8ead47aa65ceb..1a2806224e4a9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java @@ -68,7 +68,10 @@ public CompletableFuture get( .withSearchFlags(flags -> flags.setFulltext(true)), Constants.INGESTION_SOURCE_ENTITY_NAME, query, - buildFilter(filters, Collections.emptyList()), + buildFilter( + filters, + Collections.emptyList(), + context.getOperationContext().getAspectRetriever()), null, start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java index 1839fd3cc5705..8fc26e3cec9d0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java @@ -11,6 +11,7 @@ import com.linkedin.datahub.graphql.generated.TimeSeriesAspect; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; @@ -120,7 +121,7 @@ public CompletableFuture> get(DataFetchingEnvironment env maybeStartTimeMillis, maybeEndTimeMillis, maybeLimit, - buildFilters(maybeFilters), + buildFilters(maybeFilters, context.getOperationContext().getAspectRetriever()), maybeSort); // Step 2: Bind profiles into GraphQL strong types. @@ -135,7 +136,8 @@ public CompletableFuture> get(DataFetchingEnvironment env "get"); } - private Filter buildFilters(@Nullable FilterInput maybeFilters) { + private Filter buildFilters( + @Nullable FilterInput maybeFilters, @Nullable AspectRetriever aspectRetriever) { if (maybeFilters == null) { return null; } @@ -146,7 +148,7 @@ private Filter buildFilters(@Nullable FilterInput maybeFilters) { .setAnd( new CriterionArray( maybeFilters.getAnd().stream() - .map(filter -> criterionFromFilter(filter, true)) + .map(filter -> criterionFromFilter(filter, true, aspectRetriever)) .collect(Collectors.toList()))))); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java index 9a06682c87f78..6caa858460c2f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java @@ -6,6 +6,7 @@ import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.form.DynamicFormAssignment; import com.linkedin.form.FormInfo; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -15,6 +16,7 @@ import com.linkedin.structured.PrimitivePropertyValueArray; import java.util.Objects; import javax.annotation.Nonnull; +import javax.annotation.Nullable; public class FormUtils { @@ -44,13 +46,16 @@ public static PrimitivePropertyValueArray getStructuredPropertyValuesFromInput( /** Map a GraphQL CreateDynamicFormAssignmentInput to the GMS DynamicFormAssignment aspect */ @Nonnull public static DynamicFormAssignment mapDynamicFormAssignment( - @Nonnull final CreateDynamicFormAssignmentInput input) { + @Nonnull final CreateDynamicFormAssignmentInput input, + @Nullable AspectRetriever aspectRetriever) { Objects.requireNonNull(input, "input must not be null"); final DynamicFormAssignment result = new DynamicFormAssignment(); final Filter filter = new Filter() - .setOr(ResolverUtils.buildConjunctiveCriterionArrayWithOr(input.getOrFilters())); + .setOr( + ResolverUtils.buildConjunctiveCriterionArrayWithOr( + input.getOrFilters(), aspectRetriever)); result.setFilter(filter); return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java index 0f0bb299eda83..9f6951e44dd73 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java @@ -63,7 +63,10 @@ public CompletableFuture get(DataFetchingEnvironment e context.getOperationContext().withSearchFlags(flags -> flags.setFulltext(true)), Constants.OWNERSHIP_TYPE_ENTITY_NAME, query, - buildFilter(filters, Collections.emptyList()), + buildFilter( + filters, + Collections.emptyList(), + context.getOperationContext().getAspectRetriever()), DEFAULT_SORT_CRITERION, start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java index 4120401e0150f..ce11451aa1913 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java @@ -59,7 +59,11 @@ public CompletableFuture get(final DataFetchingEnvironment e log.debug( "User {} listing policies with filters {}", context.getActorUrn(), filters.toString()); - final Filter filter = ResolverUtils.buildFilter(facetFilters, Collections.emptyList()); + final Filter filter = + ResolverUtils.buildFilter( + facetFilters, + Collections.emptyList(), + context.getOperationContext().getAspectRetriever()); return _policyFetcher .fetchPolicies(context.getOperationContext(), start, query, count, filter) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java index b0d84942b1257..95be3a68e895c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java @@ -15,6 +15,7 @@ import com.linkedin.datahub.graphql.generated.ListQueriesResult; import com.linkedin.datahub.graphql.generated.QueryEntity; import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; @@ -72,7 +73,7 @@ public CompletableFuture get(final DataFetchingEnvironment en flags -> flags.setFulltext(true).setSkipHighlighting(true)), QUERY_ENTITY_NAME, query, - buildFilters(input), + buildFilters(input, context.getOperationContext().getAspectRetriever()), sortCriterion, start, count); @@ -109,7 +110,8 @@ private List mapUnresolvedQueries(final List queryUrns) { } @Nullable - private Filter buildFilters(@Nonnull final ListQueriesInput input) { + private Filter buildFilters( + @Nonnull final ListQueriesInput input, @Nullable AspectRetriever aspectRetriever) { final AndFilterInput criteria = new AndFilterInput(); List andConditions = new ArrayList<>(); @@ -136,6 +138,6 @@ private Filter buildFilters(@Nonnull final ListQueriesInput input) { } criteria.setAnd(andConditions); - return buildFilter(Collections.emptyList(), ImmutableList.of(criteria)); + return buildFilter(Collections.emptyList(), ImmutableList.of(criteria), aspectRetriever); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java index 202c78a62c9ae..0181877864390 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java @@ -26,6 +26,7 @@ import com.linkedin.metadata.service.ViewService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; import io.opentelemetry.extension.annotations.WithSpan; import java.net.URISyntaxException; import java.util.Collections; @@ -33,6 +34,7 @@ import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; +import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -62,7 +64,7 @@ public CompletableFuture get(DataFetchingEnvironment List modules = _recommendationsService.listRecommendations( context.getOperationContext(), - mapRequestContext(input.getRequestContext()), + mapRequestContext(context.getOperationContext(), input.getRequestContext()), viewFilter(context.getOperationContext(), _viewService, input.getViewUrn()), input.getLimit()); return ListRecommendationsResult.builder() @@ -83,7 +85,7 @@ public CompletableFuture get(DataFetchingEnvironment } private com.linkedin.metadata.recommendation.RecommendationRequestContext mapRequestContext( - RecommendationRequestContext requestContext) { + @Nonnull OperationContext opContext, RecommendationRequestContext requestContext) { com.linkedin.metadata.recommendation.ScenarioType mappedScenarioType; try { mappedScenarioType = @@ -103,7 +105,9 @@ private com.linkedin.metadata.recommendation.RecommendationRequestContext mapReq searchRequestContext.setFilters( new CriterionArray( requestContext.getSearchRequestContext().getFilters().stream() - .map(facetField -> criterionFromFilter(facetField)) + .map( + facetField -> + criterionFromFilter(facetField, opContext.getAspectRetriever())) .collect(Collectors.toList()))); } mappedRequestContext.setSearchRequestContext(searchRequestContext); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java index 10a09b95bfd6e..04a72b14eeb02 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java @@ -63,7 +63,9 @@ public CompletableFuture get(DataFetchingEnvironment environme UrnUtils.getUrn(input.getViewUrn())) : null; - final Filter inputFilter = ResolverUtils.buildFilter(null, input.getOrFilters()); + final Filter inputFilter = + ResolverUtils.buildFilter( + null, input.getOrFilters(), context.getOperationContext().getAspectRetriever()); final SearchFlags searchFlags = mapInputFlags(context, input.getSearchFlags()); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java index 13861c94ba336..79792940ef27f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java @@ -3,6 +3,7 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; import static org.apache.commons.lang3.StringUtils.isBlank; +import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.ValidationException; import com.linkedin.datahub.graphql.generated.AutoCompleteInput; @@ -39,6 +40,7 @@ public AutoCompleteResolver(@Nonnull final List> sear @Override public CompletableFuture get(DataFetchingEnvironment environment) { + final QueryContext context = environment.getContext(); final AutoCompleteInput input = bindArgument(environment.getArgument("input"), AutoCompleteInput.class); @@ -49,7 +51,11 @@ public CompletableFuture get(DataFetchingEnvironment enviro throw new ValidationException("'query' parameter can not be null or empty"); } - final Filter filter = ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); + final Filter filter = + ResolverUtils.buildFilter( + input.getFilters(), + input.getOrFilters(), + context.getOperationContext().getRetrieverContext().orElseThrow().getAspectRetriever()); final int limit = input.getLimit() != null ? input.getLimit() : DEFAULT_LIMIT; return GraphQLConcurrencyUtils.supplyAsync( () -> { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java index c849e3ad3f68c..5b5888b89b241 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.search; +import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.AutoCompleteMultipleInput; import com.linkedin.datahub.graphql.generated.AutoCompleteMultipleResults; @@ -33,6 +34,7 @@ public static CompletableFuture batchGetAutocomplet DataFetchingEnvironment environment, @Nullable DataHubViewInfo view) { final int limit = input.getLimit() != null ? input.getLimit() : DEFAULT_LIMIT; + final QueryContext context = environment.getContext(); final List> autoCompletesFuture = entities.stream() @@ -41,7 +43,10 @@ public static CompletableFuture batchGetAutocomplet GraphQLConcurrencyUtils.supplyAsync( () -> { final Filter filter = - ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); + ResolverUtils.buildFilter( + input.getFilters(), + input.getOrFilters(), + context.getOperationContext().getAspectRetriever()); final Filter finalFilter = view != null ? SearchUtils.combineFilters( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java index 77eef1b9a25c6..8b8b93353bc6e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java @@ -72,7 +72,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) UrnUtils.getUrn(input.getViewUrn())) : null; - final Filter baseFilter = ResolverUtils.buildFilter(null, input.getOrFilters()); + final Filter baseFilter = + ResolverUtils.buildFilter( + null, input.getOrFilters(), context.getOperationContext().getAspectRetriever()); final SearchFlags searchFlags; com.linkedin.datahub.graphql.generated.SearchFlags inputFlags = input.getSearchFlags(); if (inputFlags != null) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java index addd217d68724..14b2d3b8f8420 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java @@ -129,7 +129,10 @@ public CompletableFuture get(DataFetchingEnvironment entityNames, sanitizedQuery, maxHops, - ResolverUtils.buildFilter(facetFilters, input.getOrFilters()), + ResolverUtils.buildFilter( + facetFilters, + input.getOrFilters(), + context.getOperationContext().getAspectRetriever()), null, scrollId, keepAlive, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java index a9da1c4055434..287e339ddee50 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java @@ -59,7 +59,10 @@ public CompletableFuture get(DataFetchingEnvironment environment) : null; final Filter baseFilter = - ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); + ResolverUtils.buildFilter( + input.getFilters(), + input.getOrFilters(), + context.getOperationContext().getAspectRetriever()); SearchFlags searchFlags = mapInputFlags(context, input.getSearchFlags()); SortCriterion sortCriterion = diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java index 238f2375ee207..f342d251acd72 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java @@ -138,7 +138,10 @@ public CompletableFuture get(DataFetchingEnvironment count); final Filter filter = - ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); + ResolverUtils.buildFilter( + input.getFilters(), + input.getOrFilters(), + context.getOperationContext().getAspectRetriever()); final SearchFlags searchFlags; com.linkedin.datahub.graphql.generated.SearchFlags inputFlags = input.getSearchFlags(); if (inputFlags != null) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java index 35586ea29571c..5fb2f8f14b293 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java @@ -85,7 +85,10 @@ public CompletableFuture get(DataFetchingEnvironment environment) context.getOperationContext().withSearchFlags(flags -> searchFlags), entityName, sanitizedQuery, - ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()), + ResolverUtils.buildFilter( + input.getFilters(), + input.getOrFilters(), + context.getOperationContext().getAspectRetriever()), null, start, count)); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolver.java index 49c3467adb58b..7c3e433dd1ede 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolver.java @@ -49,7 +49,9 @@ public CompletableFuture get(final DataFetchingEnvironment environm DataHubViewType.valueOf(input.getViewType().toString()), input.getName(), input.getDescription(), - ViewUtils.mapDefinition(input.getDefinition()), + ViewUtils.mapDefinition( + input.getDefinition(), + context.getOperationContext().getAspectRetriever()), System.currentTimeMillis()); return createView(urn, input); } catch (Exception e) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java index 28b8fe50b70d6..952e55ca117f2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java @@ -16,6 +16,7 @@ import com.linkedin.datahub.graphql.generated.ListViewsResult; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; @@ -30,6 +31,7 @@ import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; /** Resolver used for listing global DataHub Views. */ @@ -71,7 +73,7 @@ public CompletableFuture get(final DataFetchingEnvironment envi context.getOperationContext().withSearchFlags(flags -> flags.setFulltext(true)), Constants.DATAHUB_VIEW_ENTITY_NAME, query, - buildFilters(), + buildFilters(context.getOperationContext().getAspectRetriever()), DEFAULT_SORT_CRITERION, start, count); @@ -107,7 +109,7 @@ private List mapUnresolvedViews(final List entityUrns) { return results; } - private Filter buildFilters() { + private Filter buildFilters(@Nullable AspectRetriever aspectRetriever) { final AndFilterInput globalCriteria = new AndFilterInput(); List andConditions = new ArrayList<>(); andConditions.add( @@ -118,6 +120,6 @@ private Filter buildFilters() { false, FilterOperator.EQUAL)); globalCriteria.setAnd(andConditions); - return buildFilter(Collections.emptyList(), ImmutableList.of(globalCriteria)); + return buildFilter(Collections.emptyList(), ImmutableList.of(globalCriteria), aspectRetriever); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java index 218bedcd0beff..32eb0e46bb616 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java @@ -15,6 +15,7 @@ import com.linkedin.datahub.graphql.generated.ListViewsResult; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; @@ -74,7 +75,10 @@ public CompletableFuture get(final DataFetchingEnvironment envi context.getOperationContext().withSearchFlags(flags -> flags.setFulltext(true)), Constants.DATAHUB_VIEW_ENTITY_NAME, query, - buildFilters(viewType, context.getActorUrn()), + buildFilters( + viewType, + context.getActorUrn(), + context.getOperationContext().getAspectRetriever()), DEFAULT_SORT_CRITERION, start, count); @@ -110,7 +114,10 @@ private List mapUnresolvedViews(final List entityUrns) { return results; } - private Filter buildFilters(@Nullable final String viewType, final String creatorUrn) { + private Filter buildFilters( + @Nullable final String viewType, + final String creatorUrn, + @Nullable AspectRetriever aspectRetriever) { // And GLOBAL views for the authenticated actor. final AndFilterInput filterCriteria = new AndFilterInput(); final List andConditions = new ArrayList<>(); @@ -125,6 +132,6 @@ private Filter buildFilters(@Nullable final String viewType, final String creato filterCriteria.setAnd(andConditions); // Currently, there is no way to fetch the views belonging to another user. - return buildFilter(Collections.emptyList(), ImmutableList.of(filterCriteria)); + return buildFilter(Collections.emptyList(), ImmutableList.of(filterCriteria), aspectRetriever); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolver.java index 9ab5efe83105a..11ec1c5705bd3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolver.java @@ -48,7 +48,8 @@ public CompletableFuture get(final DataFetchingEnvironment environm urn, input.getName(), input.getDescription(), - ViewUtils.mapDefinition(input.getDefinition()), + ViewUtils.mapDefinition( + input.getDefinition(), context.getOperationContext().getAspectRetriever()), System.currentTimeMillis()); log.info(String.format("Successfully updated View %s with urn", urn)); return getView(context, urn, context.getAuthentication()); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtils.java index 29ceba2f1b86c..70a5ced4bfbf1 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtils.java @@ -13,6 +13,7 @@ import com.linkedin.datahub.graphql.generated.LogicalOperator; import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; import com.linkedin.metadata.query.filter.CriterionArray; @@ -25,6 +26,7 @@ import java.util.Objects; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import javax.annotation.Nullable; public class ViewUtils { @@ -92,12 +94,12 @@ public static boolean canUpdateView( */ @Nonnull public static DataHubViewDefinition mapDefinition( - @Nonnull final DataHubViewDefinitionInput input) { + @Nonnull final DataHubViewDefinitionInput input, @Nullable AspectRetriever aspectRetriever) { Objects.requireNonNull(input, "input must not be null"); final DataHubViewDefinition result = new DataHubViewDefinition(); if (input.getFilter() != null) { - result.setFilter(mapFilter(input.getFilter()), SetMode.IGNORE_NULL); + result.setFilter(mapFilter(input.getFilter(), aspectRetriever), SetMode.IGNORE_NULL); } result.setEntityTypes( new StringArray( @@ -118,17 +120,19 @@ public static DataHubViewDefinition mapDefinition( * which cannot be rendered in full by the UI. We account for this on the read path by logging a * warning and returning an empty View in such cases. */ - private static Filter mapFilter(@Nonnull DataHubViewFilterInput input) { + private static Filter mapFilter( + @Nonnull DataHubViewFilterInput input, @Nullable AspectRetriever aspectRetriever) { if (LogicalOperator.AND.equals(input.getOperator())) { // AND - return buildAndFilter(input.getFilters()); + return buildAndFilter(input.getFilters(), aspectRetriever); } else { // OR - return buildOrFilter(input.getFilters()); + return buildOrFilter(input.getFilters(), aspectRetriever); } } - private static Filter buildAndFilter(@Nonnull List input) { + private static Filter buildAndFilter( + @Nonnull List input, @Nullable AspectRetriever aspectRetriever) { final Filter result = new Filter(); result.setOr( new ConjunctiveCriterionArray( @@ -137,12 +141,13 @@ private static Filter buildAndFilter(@Nonnull List input) { .setAnd( new CriterionArray( input.stream() - .map(ResolverUtils::criterionFromFilter) + .map(f -> ResolverUtils.criterionFromFilter(f, aspectRetriever)) .collect(Collectors.toList())))))); return result; } - private static Filter buildOrFilter(@Nonnull List input) { + private static Filter buildOrFilter( + @Nonnull List input, @Nullable AspectRetriever aspectRetriever) { final Filter result = new Filter(); result.setOr( new ConjunctiveCriterionArray( @@ -152,7 +157,9 @@ private static Filter buildOrFilter(@Nonnull List input) { new ConjunctiveCriterion() .setAnd( new CriterionArray( - ImmutableList.of(ResolverUtils.criterionFromFilter(filter))))) + ImmutableList.of( + ResolverUtils.criterionFromFilter( + filter, aspectRetriever))))) .collect(Collectors.toList()))); return result; } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java index 57d85e5b204c2..f98284e92ede5 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; +import static org.mockito.Mockito.mock; import static org.testng.AssertJUnit.assertEquals; import com.google.common.collect.ImmutableList; @@ -11,6 +12,7 @@ import com.linkedin.datahub.graphql.TestUtils; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.generated.FilterOperator; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -28,7 +30,7 @@ public class ResolverUtilsTest { @Test public void testCriterionFromFilter() throws Exception { - final DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + final DataFetchingEnvironment mockEnv = mock(DataFetchingEnvironment.class); final QueryContext mockAllowContext = TestUtils.getMockAllowContext(); Mockito.when(mockEnv.getContext()).thenReturn(mockAllowContext); @@ -40,7 +42,8 @@ public void testCriterionFromFilter() throws Exception { null, ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def"), false, - FilterOperator.EQUAL)); + FilterOperator.EQUAL), + mock(AspectRetriever.class)); assertEquals( valuesCriterion, new Criterion() @@ -53,7 +56,8 @@ public void testCriterionFromFilter() throws Exception { // this is the legacy pathway Criterion valueCriterion = criterionFromFilter( - new FacetFilterInput("tags", "urn:li:tag:abc", null, true, FilterOperator.EQUAL)); + new FacetFilterInput("tags", "urn:li:tag:abc", null, true, FilterOperator.EQUAL), + mock(AspectRetriever.class)); assertEquals( valueCriterion, new Criterion() @@ -66,7 +70,9 @@ public void testCriterionFromFilter() throws Exception { // check that both being null doesn't cause a NPE. this should never happen except via API // interaction Criterion doubleNullCriterion = - criterionFromFilter(new FacetFilterInput("tags", null, null, true, FilterOperator.EQUAL)); + criterionFromFilter( + new FacetFilterInput("tags", null, null, true, FilterOperator.EQUAL), + mock(AspectRetriever.class)); assertEquals( doubleNullCriterion, new Criterion() diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java index 845ce1e6129d8..f6e7e7267a060 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java @@ -20,6 +20,7 @@ import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.SystemMetadata; import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -56,7 +57,7 @@ public void testGetSuccess() throws Exception { Mockito.eq(5), Mockito.eq( AssertionRunEventResolver.buildFilter( - null, AssertionRunStatus.COMPLETE.toString())))) + null, AssertionRunStatus.COMPLETE.toString(), null)))) .thenReturn( ImmutableList.of( new EnvelopedAspect() @@ -68,6 +69,9 @@ public void testGetSuccess() throws Exception { // Execute resolver QueryContext mockContext = Mockito.mock(QueryContext.class); Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class)); + Mockito.when(mockContext.getOperationContext()) + .thenReturn(Mockito.mock(OperationContext.class)); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); Mockito.when(mockEnv.getArgumentOrDefault(Mockito.eq("status"), Mockito.eq(null))) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java index ad30e48d8361b..6c876226a45e6 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java @@ -46,7 +46,7 @@ public void testGetSuccess() throws Exception { any(), Mockito.eq(Constants.ACCESS_TOKEN_ENTITY_NAME), Mockito.eq(""), - Mockito.eq(buildFilter(filters, Collections.emptyList())), + Mockito.eq(buildFilter(filters, Collections.emptyList(), null)), Mockito.any(SortCriterion.class), Mockito.eq(input.getStart()), Mockito.eq(input.getCount()))) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java index 4897d0819b59f..9cf7e62e65e25 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; import com.google.common.collect.ImmutableList; import com.linkedin.common.AuditStamp; @@ -17,6 +18,7 @@ import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.resolvers.chart.BrowseV2Resolver; import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.browse.BrowseResultGroupV2; import com.linkedin.metadata.browse.BrowseResultGroupV2Array; import com.linkedin.metadata.browse.BrowseResultMetadata; @@ -100,7 +102,7 @@ public static void testBrowseV2SuccessWithQueryAndFilter() throws Exception { facetFilterInput.setValues(ImmutableList.of("urn:li:corpuser:test")); andFilterInput.setAnd(ImmutableList.of(facetFilterInput)); orFilters.add(andFilterInput); - Filter filter = ResolverUtils.buildFilter(null, orFilters); + Filter filter = ResolverUtils.buildFilter(null, orFilters, mock(AspectRetriever.class)); EntityClient mockClient = initMockEntityClient( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java index 9ed1d5001b75c..70b427a1606f1 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java @@ -169,6 +169,6 @@ private Filter buildFilter(@Nullable QuerySource source, @Nullable String entity FilterOperator.EQUAL)); } criteria.setAnd(andConditions); - return ResolverUtils.buildFilter(Collections.emptyList(), ImmutableList.of(criteria)); + return ResolverUtils.buildFilter(Collections.emptyList(), ImmutableList.of(criteria), null); } } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java index 443050456f3fd..701ddd84c173e 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.TestUtils.*; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; @@ -17,6 +18,7 @@ import com.linkedin.datahub.graphql.generated.FilterOperator; import com.linkedin.datahub.graphql.generated.LogicalOperator; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -169,7 +171,7 @@ public void testMapDefinition() throws Exception { // the keyword mapping. .setCondition(Condition.CONTAIN)))))))); - assertEquals(ViewUtils.mapDefinition(input), expectedResult); + assertEquals(ViewUtils.mapDefinition(input, mock(AspectRetriever.class)), expectedResult); } private static ViewService initViewService(DataHubViewType viewType) { diff --git a/datahub-upgrade/build.gradle b/datahub-upgrade/build.gradle index 9108f3009b4ba..4b46996d30685 100644 --- a/datahub-upgrade/build.gradle +++ b/datahub-upgrade/build.gradle @@ -91,6 +91,27 @@ bootJar { archiveFileName = "${project.name}.jar" } +bootRun { + environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml" + environment "ENABLE_STRUCTURED_PROPERTIES_SYSTEM_UPDATE", "true" + environment "ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX", "true" + environment "SERVER_PORT", "8083" + args += ["-u", "SystemUpdate"] +} + +/** + * Runs SystemUpdate on locally running system + */ +task run(type: Exec) { + dependsOn bootJar + group = "Execution" + description = "Run the datahub-upgrade SystemUpdate process locally." + environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml" + environment "ENABLE_STRUCTURED_PROPERTIES_SYSTEM_UPDATE", "true" + environment "ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX", "true" + commandLine "java", "-jar", "-Dserver.port=8083", bootJar.getArchiveFile().get(), "-u", "SystemUpdate" +} + docker { name "${docker_registry}/${docker_repo}:v${version}" version "v${version}" diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BuildIndicesConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BuildIndicesConfig.java index 3510fa513b3b9..e0de8a7255d61 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BuildIndicesConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BuildIndicesConfig.java @@ -6,7 +6,6 @@ import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.graph.GraphService; -import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; @@ -26,8 +25,7 @@ public BlockingSystemUpgrade buildIndices( final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents baseElasticSearchComponents, final ConfigurationProvider configurationProvider, - final AspectDao aspectDao, - final EntityRegistry entityRegistry) { + final AspectDao aspectDao) { return new BuildIndices( systemMetadataService, @@ -36,7 +34,6 @@ public BlockingSystemUpgrade buildIndices( graphService, baseElasticSearchComponents, configurationProvider, - aspectDao, - entityRegistry); + aspectDao); } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CleanIndicesConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CleanIndicesConfig.java index 4f54b01459625..7559aaf3f3cdb 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CleanIndicesConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CleanIndicesConfig.java @@ -4,6 +4,7 @@ import com.linkedin.datahub.upgrade.system.elasticsearch.CleanIndices; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; +import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.systemmetadata.SystemMetadataService; @@ -23,7 +24,8 @@ public NonBlockingSystemUpgrade cleanIndices( final GraphService graphService, final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents baseElasticSearchComponents, - final ConfigurationProvider configurationProvider) { + final ConfigurationProvider configurationProvider, + final AspectDao aspectDao) { return new CleanIndices( systemMetadataService, @@ -31,6 +33,7 @@ public NonBlockingSystemUpgrade cleanIndices( entitySearchService, graphService, baseElasticSearchComponents, - configurationProvider); + configurationProvider, + aspectDao); } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/BuildIndices.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/BuildIndices.java index fea0479876a2e..a91bba2fa0a97 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/BuildIndices.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/BuildIndices.java @@ -1,5 +1,13 @@ package com.linkedin.datahub.upgrade.system.elasticsearch; +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; + +import com.datahub.util.RecordUtils; +import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.system.BlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.elasticsearch.steps.BuildIndicesPostStep; @@ -8,14 +16,17 @@ import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.graph.GraphService; -import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.shared.ElasticSearchIndexed; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.util.ArrayList; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -31,8 +42,7 @@ public BuildIndices( final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents baseElasticSearchComponents, final ConfigurationProvider configurationProvider, - final AspectDao aspectDao, - final EntityRegistry entityRegistry) { + final AspectDao aspectDao) { List indexedServices = Stream.of(graphService, entitySearchService, systemMetadataService, timeseriesAspectService) @@ -41,12 +51,7 @@ public BuildIndices( .collect(Collectors.toList()); _steps = - buildSteps( - indexedServices, - baseElasticSearchComponents, - configurationProvider, - aspectDao, - entityRegistry); + buildSteps(indexedServices, baseElasticSearchComponents, configurationProvider, aspectDao); } @Override @@ -64,8 +69,14 @@ private List buildSteps( final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents baseElasticSearchComponents, final ConfigurationProvider configurationProvider, - final AspectDao aspectDao, - final EntityRegistry entityRegistry) { + final AspectDao aspectDao) { + + final Set> structuredProperties; + if (configurationProvider.getStructuredProperties().isSystemUpdateEnabled()) { + structuredProperties = getActiveStructuredPropertiesDefinitions(aspectDao); + } else { + structuredProperties = Set.of(); + } final List steps = new ArrayList<>(); // Disable ES write mode/change refresh rate and clone indices @@ -74,13 +85,47 @@ private List buildSteps( baseElasticSearchComponents, indexedServices, configurationProvider, - aspectDao, - entityRegistry)); + structuredProperties)); // Configure graphService, entitySearchService, systemMetadataService, timeseriesAspectService - steps.add(new BuildIndicesStep(indexedServices)); + steps.add(new BuildIndicesStep(indexedServices, structuredProperties)); // Reset configuration (and delete clones? Or just do this regularly? Or delete clone in // pre-configure step if it already exists? - steps.add(new BuildIndicesPostStep(baseElasticSearchComponents, indexedServices)); + steps.add( + new BuildIndicesPostStep( + baseElasticSearchComponents, indexedServices, structuredProperties)); return steps; } + + static Set> getActiveStructuredPropertiesDefinitions( + AspectDao aspectDao) { + Set removedStructuredPropertyUrns; + try (Stream stream = + aspectDao.streamAspects(STRUCTURED_PROPERTY_ENTITY_NAME, STATUS_ASPECT_NAME)) { + removedStructuredPropertyUrns = + stream + .map( + entityAspect -> + Pair.of( + entityAspect.getUrn(), + RecordUtils.toRecordTemplate(Status.class, entityAspect.getMetadata()))) + .filter(status -> status.getSecond().isRemoved()) + .map(Pair::getFirst) + .collect(Collectors.toSet()); + } + + try (Stream stream = + aspectDao.streamAspects( + STRUCTURED_PROPERTY_ENTITY_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) { + return stream + .map( + entityAspect -> + Pair.of( + UrnUtils.getUrn(entityAspect.getUrn()), + RecordUtils.toRecordTemplate( + StructuredPropertyDefinition.class, entityAspect.getMetadata()))) + .filter( + definition -> !removedStructuredPropertyUrns.contains(definition.getKey().toString())) + .collect(Collectors.toSet()); + } + } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/CleanIndices.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/CleanIndices.java index e316481e2b07e..96aea906b021e 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/CleanIndices.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/CleanIndices.java @@ -1,16 +1,23 @@ package com.linkedin.datahub.upgrade.system.elasticsearch; +import static com.linkedin.datahub.upgrade.system.elasticsearch.BuildIndices.getActiveStructuredPropertiesDefinitions; + +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.elasticsearch.steps.CleanIndicesStep; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; +import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.shared.ElasticSearchIndexed; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; import lombok.extern.slf4j.Slf4j; @@ -26,7 +33,15 @@ public CleanIndices( final GraphService graphService, final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents baseElasticSearchComponents, - final ConfigurationProvider configurationProvider) { + final ConfigurationProvider configurationProvider, + final AspectDao aspectDao) { + + final Set> structuredProperties; + if (configurationProvider.getStructuredProperties().isSystemUpdateEnabled()) { + structuredProperties = getActiveStructuredPropertiesDefinitions(aspectDao); + } else { + structuredProperties = Set.of(); + } List indexedServices = Stream.of(graphService, entitySearchService, systemMetadataService, timeseriesAspectService) @@ -39,7 +54,8 @@ public CleanIndices( new CleanIndicesStep( baseElasticSearchComponents.getSearchClient(), configurationProvider.getElasticSearch(), - indexedServices)); + indexedServices, + structuredProperties)); } @Override diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java index a44f6d6487067..09f65c8448027 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java @@ -4,6 +4,7 @@ import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.getAllReindexConfigs; import com.google.common.collect.ImmutableMap; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; @@ -12,8 +13,11 @@ import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ReindexConfig; import com.linkedin.metadata.shared.ElasticSearchIndexed; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; @@ -25,8 +29,9 @@ @Slf4j public class BuildIndicesPostStep implements UpgradeStep { - private final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents _esComponents; - private final List _services; + private final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents esComponents; + private final List services; + private final Set> structuredProperties; @Override public String id() { @@ -44,7 +49,7 @@ public Function executable() { try { List indexConfigs = - getAllReindexConfigs(_services).stream() + getAllReindexConfigs(services, structuredProperties).stream() .filter(ReindexConfig::requiresReindex) .collect(Collectors.toList()); @@ -55,7 +60,7 @@ public Function executable() { request.settings(indexSettings); boolean ack = - _esComponents + esComponents .getSearchClient() .indices() .putSettings(request, RequestOptions.DEFAULT) @@ -69,7 +74,7 @@ public Function executable() { if (ack) { ack = IndexUtils.validateWriteBlock( - _esComponents.getSearchClient(), indexConfig.name(), false); + esComponents.getSearchClient(), indexConfig.name(), false); log.info( "Validated index {} with new settings. Settings: {}, Acknowledged: {}", indexConfig.name(), diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java index c3c9981b1dd7e..983e7f0c97f38 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java @@ -2,13 +2,9 @@ import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.INDEX_BLOCKS_WRITE_SETTING; import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.getAllReindexConfigs; -import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; -import com.datahub.util.RecordUtils; import com.google.common.collect.ImmutableMap; -import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; @@ -16,9 +12,6 @@ import com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; -import com.linkedin.metadata.entity.AspectDao; -import com.linkedin.metadata.entity.EntityAspect; -import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ReindexConfig; import com.linkedin.metadata.shared.ElasticSearchIndexed; import com.linkedin.structured.StructuredPropertyDefinition; @@ -29,7 +22,6 @@ import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; -import java.util.stream.Stream; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.opensearch.OpenSearchStatusException; @@ -40,11 +32,10 @@ @RequiredArgsConstructor @Slf4j public class BuildIndicesPreStep implements UpgradeStep { - private final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents _esComponents; - private final List _services; - private final ConfigurationProvider _configurationProvider; - private final AspectDao _aspectDao; - private final EntityRegistry _entityRegistry; + private final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents esComponents; + private final List services; + private final ConfigurationProvider configurationProvider; + private final Set> structuredProperties; @Override public String id() { @@ -60,13 +51,8 @@ public int retryCount() { public Function executable() { return (context) -> { try { - final List reindexConfigs; - if (_configurationProvider.getStructuredProperties().isSystemUpdateEnabled()) { - reindexConfigs = - getAllReindexConfigs(_services, getActiveStructuredPropertiesDefinitions(_aspectDao)); - } else { - reindexConfigs = getAllReindexConfigs(_services); - } + final List reindexConfigs = + getAllReindexConfigs(services, structuredProperties); // Get indices to update List indexConfigs = @@ -76,7 +62,7 @@ public Function executable() { for (ReindexConfig indexConfig : indexConfigs) { String indexName = - IndexUtils.resolveAlias(_esComponents.getSearchClient(), indexConfig.name()); + IndexUtils.resolveAlias(esComponents.getSearchClient(), indexConfig.name()); boolean ack = blockWrites(indexName); if (!ack) { @@ -87,11 +73,11 @@ public Function executable() { } // Clone indices - if (_configurationProvider.getElasticSearch().getBuildIndices().isCloneIndices()) { + if (configurationProvider.getElasticSearch().getBuildIndices().isCloneIndices()) { String clonedName = indexConfig.name() + "_clone_" + System.currentTimeMillis(); ResizeRequest resizeRequest = new ResizeRequest(clonedName, indexName); boolean cloneAck = - _esComponents + esComponents .getSearchClient() .indices() .clone(resizeRequest, RequestOptions.DEFAULT) @@ -121,7 +107,7 @@ private boolean blockWrites(String indexName) throws InterruptedException, IOExc boolean ack; try { ack = - _esComponents + esComponents .getSearchClient() .indices() .putSettings(request, RequestOptions.DEFAULT) @@ -145,7 +131,7 @@ private boolean blockWrites(String indexName) throws InterruptedException, IOExc } if (ack) { - ack = IndexUtils.validateWriteBlock(_esComponents.getSearchClient(), indexName, true); + ack = IndexUtils.validateWriteBlock(esComponents.getSearchClient(), indexName, true); log.info( "Validated index {} with new settings. Settings: {}, Acknowledged: {}", indexName, @@ -155,37 +141,4 @@ private boolean blockWrites(String indexName) throws InterruptedException, IOExc return ack; } - - private static Set getActiveStructuredPropertiesDefinitions( - AspectDao aspectDao) { - Set removedStructuredPropertyUrns; - try (Stream stream = - aspectDao.streamAspects(STRUCTURED_PROPERTY_ENTITY_NAME, STATUS_ASPECT_NAME)) { - removedStructuredPropertyUrns = - stream - .map( - entityAspect -> - Pair.of( - entityAspect.getUrn(), - RecordUtils.toRecordTemplate(Status.class, entityAspect.getMetadata()))) - .filter(status -> status.getSecond().isRemoved()) - .map(Pair::getFirst) - .collect(Collectors.toSet()); - } - - try (Stream stream = - aspectDao.streamAspects( - STRUCTURED_PROPERTY_ENTITY_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) { - return stream - .map( - entityAspect -> - Pair.of( - entityAspect.getUrn(), - RecordUtils.toRecordTemplate( - StructuredPropertyDefinition.class, entityAspect.getMetadata()))) - .filter(definition -> !removedStructuredPropertyUrns.contains(definition.getKey())) - .map(Pair::getSecond) - .collect(Collectors.toSet()); - } - } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesStep.java index d37ee173bd9af..5cf370162a312 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesStep.java @@ -1,11 +1,15 @@ package com.linkedin.datahub.upgrade.system.elasticsearch.steps; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult; import com.linkedin.metadata.shared.ElasticSearchIndexed; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.util.List; +import java.util.Set; import java.util.function.Function; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -14,7 +18,8 @@ @RequiredArgsConstructor public class BuildIndicesStep implements UpgradeStep { - private final List _services; + private final List services; + private final Set> structuredProperties; @Override public String id() { @@ -30,8 +35,8 @@ public int retryCount() { public Function executable() { return (context) -> { try { - for (ElasticSearchIndexed service : _services) { - service.reindexAll(); + for (ElasticSearchIndexed service : services) { + service.reindexAll(structuredProperties); } } catch (Exception e) { log.error("BuildIndicesStep failed.", e); diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java index c3a4d8ab89c07..fd5592c4ead25 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.upgrade.system.elasticsearch.steps; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; @@ -8,7 +9,10 @@ import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.shared.ElasticSearchIndexed; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.util.List; +import java.util.Set; import java.util.function.Function; import lombok.extern.slf4j.Slf4j; import org.opensearch.client.RestHighLevelClient; @@ -18,14 +22,17 @@ public class CleanIndicesStep implements UpgradeStep { private final RestHighLevelClient searchClient; private final ElasticSearchConfiguration esConfig; private final List indexedServices; + private final Set> structuredProperties; public CleanIndicesStep( final RestHighLevelClient searchClient, final ElasticSearchConfiguration esConfig, - final List indexedServices) { + final List indexedServices, + final Set> structuredProperties) { this.searchClient = searchClient; this.esConfig = esConfig; this.indexedServices = indexedServices; + this.structuredProperties = structuredProperties; } @Override @@ -42,7 +49,7 @@ public int retryCount() { public Function executable() { return (context) -> { try { - IndexUtils.getAllReindexConfigs(indexedServices) + IndexUtils.getAllReindexConfigs(indexedServices, structuredProperties) .forEach( reindexConfig -> ESIndexBuilder.cleanIndex(searchClient, esConfig, reindexConfig)); } catch (Exception e) { diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java index 52b34200991c3..99d72776ff788 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java @@ -1,8 +1,10 @@ package com.linkedin.datahub.upgrade.system.elasticsearch.util; +import com.linkedin.common.urn.Urn; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ReindexConfig; import com.linkedin.metadata.shared.ElasticSearchIndexed; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -27,30 +29,15 @@ private IndexUtils() {} private static List _reindexConfigs = new ArrayList<>(); - public static List getAllReindexConfigs( - List elasticSearchIndexedList) throws IOException { - // Avoid locking & reprocessing - List reindexConfigs = new ArrayList<>(_reindexConfigs); - if (reindexConfigs.isEmpty()) { - for (ElasticSearchIndexed elasticSearchIndexed : elasticSearchIndexedList) { - reindexConfigs.addAll(elasticSearchIndexed.buildReindexConfigs()); - } - _reindexConfigs = new ArrayList<>(reindexConfigs); - } - - return reindexConfigs; - } - public static List getAllReindexConfigs( List elasticSearchIndexedList, - Collection structuredProperties) + Collection> structuredProperties) throws IOException { // Avoid locking & reprocessing List reindexConfigs = new ArrayList<>(_reindexConfigs); if (reindexConfigs.isEmpty()) { for (ElasticSearchIndexed elasticSearchIndexed : elasticSearchIndexedList) { - reindexConfigs.addAll( - elasticSearchIndexed.buildReindexConfigsWithAllStructProps(structuredProperties)); + reindexConfigs.addAll(elasticSearchIndexed.buildReindexConfigs(structuredProperties)); } _reindexConfigs = new ArrayList<>(reindexConfigs); } diff --git a/docs/api/tutorials/structured-properties.md b/docs/api/tutorials/structured-properties.md index b4363141f630b..4f830f25a8ec3 100644 --- a/docs/api/tutorials/structured-properties.md +++ b/docs/api/tutorials/structured-properties.md @@ -16,9 +16,11 @@ Learn more about structured properties in the [Structured Properties Feature Gui This guide will show you how to execute the following actions with structured properties. - Create structured properties - Read structured properties -- Delete structured properties (soft delete) +- Delete structured properties - Add structured properties to a dataset - Patch structured properties (add / remove / update a single property) +- Update structured property with breaking schema changes +- Search using structured properties ## Prerequisites @@ -87,9 +89,9 @@ datahub properties upsert -f {properties_yaml} If successful, you should see `Created structured property urn:li:structuredProperty:...` - + -```commandline +```shell curl -X 'POST' -v \ 'http://localhost:8080/openapi/v2/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition' \ -H 'accept: application/json' \ @@ -120,6 +122,83 @@ curl -X 'POST' -v \ ] }' | jq ``` + + + + +```shell +curl -X 'POST' -v \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "qualifiedName": "io.acryl.privacy.retentionTime", + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "displayName": "Retention Time", + "cardinality": "MULTIPLE", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "allowedValues": [ + { + "value": {"double": 30}, + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii" + }, + { + "value": {"double": 60}, + "description": "Use this for datasets that drive monthly reporting but contain pii" + }, + { + "value": {"double": 365}, + "description": "Use this for non-sensitive data that can be retained for longer" + } + ] +}' | jq +``` + +Example Response: + +```json +{ + "urn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "propertyDefinition": { + "value": { + "allowedValues": [ + { + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii", + "value": { + "double": 30 + } + }, + { + "description": "Use this for datasets that drive monthly reporting but contain pii", + "value": { + "double": 60 + } + }, + { + "description": "Use this for non-sensitive data that can be retained for longer", + "value": { + "double": 365 + } + } + ], + "displayName": "Retention Time", + "qualifiedName": "io.acryl.privacy.retentionTime", + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "cardinality": "MULTIPLE" + } + } +} +``` + @@ -137,7 +216,7 @@ datahub properties get --urn {urn} For example, you can run `datahub properties get --urn urn:li:structuredProperty:io.acryl.privacy.retentionTime`. If successful, you should see metadata about your properties returned. -```commandline +```json { "urn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", "qualified_name": "io.acryl.privacy.retentionTime", @@ -167,7 +246,8 @@ If successful, you should see metadata about your properties returned. ``` - + + Example Request: ``` @@ -178,7 +258,7 @@ curl -X 'GET' -v \ Example Response: -```commandline +```json { "value": { "allowedValues": [ @@ -214,6 +294,58 @@ Example Response: } ``` + + + + +Example Request: +``` +curl -X 'GET' -v \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition' \ + -H 'accept: application/json' | jq +``` + +Example Response: + +```json +{ + "urn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "propertyDefinition": { + "value": { + "allowedValues": [ + { + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii", + "value": { + "double": 30 + } + }, + { + "description": "Use this for datasets that drive monthly reporting but contain pii", + "value": { + "double": 60 + } + }, + { + "description": "Use this for non-sensitive data that can be retained for longer", + "value": { + "double": 365 + } + } + ], + "displayName": "Retention Time", + "qualifiedName": "io.acryl.privacy.retentionTime", + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "cardinality": "MULTIPLE" + } + } +} +``` + @@ -250,12 +382,13 @@ If successful, you should see `Update succeeded for urn:li:dataset:...` - + + Following command will set structured properties `retentionTime` as `60.0` to a dataset `urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)`. Please note that the structured property and the dataset must exist before executing this command. (You can create sample datasets using the `datahub docker ingest-sample-data`) -```commandline +```shell curl -X 'POST' -v \ 'http://localhost:8080/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ -H 'accept: application/json' \ @@ -272,6 +405,50 @@ curl -X 'POST' -v \ }' | jq ``` + + + + +Following command will set structured properties `retentionTime` as `60.0` to a dataset `urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)`. +Please note that the structured property and the dataset must exist before executing this command. (You can create sample datasets using the `datahub docker ingest-sample-data`) + +```shell +curl -X 'POST' -v \ + 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "values": [ + {"double": 60.0} + ] + } + ] +}' | jq +``` +Example Response: + +```json +{ + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "structuredProperties": { + "value": { + "properties": [ + { + "values": [ + { + "double": 60 + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime" + } + ] + } + } +} +``` @@ -299,11 +476,11 @@ For this example, we'll extend create a second structured property and apply bot After this your system should include both `io.acryl.privacy.retentionTime` and `io.acryl.privacy.retentionTime02`. - + Let's start by creating the second structured property. -``` +```shell curl -X 'POST' -v \ 'http://localhost:8080/openapi/v2/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime02/propertyDefinition' \ -H 'accept: application/json' \ @@ -331,10 +508,10 @@ curl -X 'POST' -v \ ``` This command will attach one of each of the two properties to our test dataset `urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)` -Specically, this will set `io.acryl.privacy.retentionTime` as `60.0` and `io.acryl.privacy.retentionTime02` as `bar2`. +Specifically, this will set `io.acryl.privacy.retentionTime` as `60.0` and `io.acryl.privacy.retentionTime02` as `bar2`. -``` +```shell curl -X 'POST' -v \ 'http://localhost:8080/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ -H 'accept: application/json' \ @@ -357,6 +534,128 @@ curl -X 'POST' -v \ }' | jq ``` + + + + +Let's start by creating the second structured property. + +```shell +curl -X 'POST' -v \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime02/propertyDefinition' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "qualifiedName": "io.acryl.privacy.retentionTime02", + "displayName": "Retention Time 02", + "valueType": "urn:li:dataType:datahub.string", + "allowedValues": [ + { + "value": {"string": "foo2"}, + "description": "test foo2 value" + }, + { + "value": {"string": "bar2"}, + "description": "test bar2 value" + } + ], + "cardinality": "SINGLE", + "entityTypes": [ + "urn:li:entityType:datahub.dataset" + ] +}' | jq +``` + +Example Response: + +```json +{ + "urn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02", + "propertyDefinition": { + "value": { + "allowedValues": [ + { + "value": { + "string": "foo2" + }, + "description": "test foo2 value" + }, + { + "value": { + "string": "bar2" + }, + "description": "test bar2 value" + } + ], + "entityTypes": [ + "urn:li:entityType:datahub.dataset" + ], + "qualifiedName": "io.acryl.privacy.retentionTime02", + "displayName": "Retention Time 02", + "cardinality": "SINGLE", + "valueType": "urn:li:dataType:datahub.string" + } + } +} +``` + +This command will attach one of each of the two properties to our test dataset `urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)` +Specically, this will set `io.acryl.privacy.retentionTime` as `60.0` and `io.acryl.privacy.retentionTime02` as `bar2`. + + +```shell +curl -X 'POST' -v \ + 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "values": [ + {"double": 60.0} + ] + }, + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02", + "values": [ + {"string": "bar2"} + ] + } + ] +}' | jq +``` + +Example Response: + +```json +{ + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "structuredProperties": { + "value": { + "properties": [ + { + "values": [ + { + "double": 60 + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime" + }, + { + "values": [ + { + "string": "bar2" + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02" + } + ] + } + } +} +``` + @@ -375,9 +674,9 @@ The expected state of our test dataset include 2 structured properties. We'd like to remove the first one (`io.acryl.privacy.retentionTime`) and preserve the second property. (`io.acryl.privacy.retentionTime02`). - + -``` +```shell curl -X 'PATCH' -v \ 'http://localhost:8080/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ -H 'accept: application/json' \ @@ -398,7 +697,7 @@ curl -X 'PATCH' -v \ ``` The response will show that the expected property has been removed. -``` +```json { "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", "aspects": { @@ -420,6 +719,51 @@ The response will show that the expected property has been removed. } ``` + + + +```shell +curl -X 'PATCH' -v \ + 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json-patch+json' \ + -d '{ + "patch": [ + { + "op": "remove", + "path": "/properties/urn:li:structuredProperty:io.acryl.privacy.retentionTime" + } + ], + "arrayPrimaryKeys": { + "properties": [ + "propertyUrn" + ] + } + }' | jq +``` +The response will show that the expected property has been removed. + +```json +{ + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "structuredProperties": { + "value": { + "properties": [ + { + "values": [ + { + "string": "bar2" + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02" + } + ] + } + } +} +``` + + #### Expected Outcomes @@ -436,9 +780,9 @@ You can see that the first property has been removed and the second property is In this example, we'll add the property back with a different value, preserving the existing property. - + -``` +```shell curl -X 'PATCH' -v \ 'http://localhost:8080/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ -H 'accept: application/json' \ @@ -466,8 +810,9 @@ curl -X 'PATCH' -v \ }' | jq ``` -Below is the expected response: -``` +Example Response: + +```json { "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", "aspects": { @@ -497,26 +842,91 @@ Below is the expected response: } ``` -The response shows that the property was re-added with the new value bar instead of the previous value foo. +The response shows that the property was re-added with the new value 365.0 instead of the previous value 60.0. - -#### Expected Outcomes -You can see that the first property has been added back with a new value and the second property is still present. + -

- -

+```shell +curl -X 'PATCH' -v \ + 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json-patch+json' \ + -d '{ + "patch": [ + { + "op": "add", + "path": "/properties/urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "value": { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "values": [ + { + "double": 365.0 + } + ] + } + } + ], + "arrayPrimaryKeys": { + "properties": [ + "propertyUrn" + ] + } + }' | jq +``` +Example Response: + +```json +{ + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "structuredProperties": { + "value": { + "properties": [ + { + "values": [ + { + "string": "bar2" + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02" + }, + { + "values": [ + { + "double": 365 + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime" + } + ] + } + } +} +``` + +The response shows that the property was re-added with the new value 365 instead of the previous value 60. + +
+ + + +#### Expected Outcomes +You can see that the first property has been added back with a new value and the second property is still present. + +

+ +

## Delete Structured Properties -There are two types of deletion present in DataHub: hard and soft delete. As of the current release only the soft delete is supported for Structured Properties. +There are two types of deletion present in DataHub: hard and soft delete. :::note SOFT DELETE -A soft deleted Structured Property does not remove any underlying data on the Structured Property entity or the Structured Property's values written to other entities. The soft delete is 100% reversible with zero data loss. When a Structured Property is soft deleted, a few operations are not available. +A soft deleted Structured Property does not remove any underlying data on the Structured Property entity or the Structured Property's values written to other entities. +The soft delete is 100% reversible with zero data loss. When a Structured Property is soft deleted, a few operations are not available. Structured Property Soft Delete Effects: @@ -526,6 +936,19 @@ Structured Property Soft Delete Effects: - Search filters using a soft deleted Structured Property will be denied ::: +:::note HARD DELETE +A hard deleted Structured Property REMOVES all underlying data for the Structured Property entity and the Structured Property's values written to other entities. +The hard delete is NOT reversible. + +Structured Property Hard Delete Effects: + +- Structured Property entity is removed +- Structured Property values are removed via PATCH MCPs on their respective entities +- Rollback is not possible +- Elasticsearch index mappings will continue to contain references to the hard deleted property until reindex +::: + +### Soft Delete @@ -537,11 +960,11 @@ datahub delete --urn {urn} ``` - + The following command will soft delete the test property by writing to the status aspect. -``` +```shell curl -X 'POST' \ 'http://localhost:8080/openapi/v2/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false' \ -H 'accept: application/json' \ @@ -553,7 +976,7 @@ curl -X 'POST' \ If you want to **remove the soft delete**, you can do so by either hard deleting the status aspect or changing the removed boolean to `false` like below. -``` +```shell curl -X 'POST' \ 'http://localhost:8080/openapi/v2/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false' \ -H 'accept: application/json' \ @@ -563,5 +986,544 @@ curl -X 'POST' \ }' | jq ``` + + + + +The following command will soft delete the test property by writing to the status aspect. + +```shell +curl -X 'POST' \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ +"removed": true +}' | jq +``` + +Example Response: + +```json +{ + "urn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "status": { + "value": { + "removed": true + } + } +} +``` + +If you want to **remove the soft delete**, you can do so by either hard deleting the status aspect or changing the removed boolean to `false` like below. + +```shell +curl -X 'POST' \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ +"removed": false +}' | jq +``` + +Example Response: + +```json +{ + "urn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "status": { + "value": { + "removed": false + } + } +} +``` + + + + + +### Hard Delete + + + + +The following command will hard delete the test property. + +```commandline +datahub delete --urn {urn} --hard +``` + + + + + +The following command will hard delete the test property. + +```shell +curl -v -X 'DELETE' \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime' +``` + +Example Response: + +```text +> DELETE /openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime HTTP/1.1 +> Host: localhost:8080 +> User-Agent: curl/8.4.0 +> Accept: */* +> +< HTTP/1.1 200 OK +< Date: Fri, 14 Jun 2024 17:30:27 GMT +< Content-Length: 0 +< Server: Jetty(11.0.19) +``` + + + + +#### Index Mappings Cleanup + +After the asynchronous delete of all Structured Property values have been processed, triggered by the above +hard delete, it is possible to remove the remaining index mappings. Note that if even 1 Structured Property value remains +the mapping will not be removed for a given entity index. + +Run the DataHub system-update job (automatically run with every helm upgrade or install and quickstart) with +the following environment variables enabled. + +This will trigger an ES index which will take time to complete. During the process the entire index is recreated. + +```shell +ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true +ENABLE_STRUCTURED_PROPERTIES_SYSTEM_UPDATE=true +``` + +## Update Structured Property With Breaking Schema Changes + +This section will demonstrate how to make backwards incompatible schema changes. Making backwards incompatible +schema changes will remove previously written data. + +Breaking schema changes are implemented by setting a version string within the Structured Property definition. This +version must be in the following format: `yyyyMMddhhmmss`, i.e. `20240614080000` + +:::IMPORTANT NOTES +Old values will not be retrieve-able after the new Structured Property definition is applied. + +The old values will be subject to deletion asynchronously (future work). +::: + +In the following example, we'll revisit the `retentionTime` structured property and apply a breaking change +by changing the cardinality from `MULTIPLE` to `SINGLE`. Normally this change would be rejected as a +backwards incompatible change since values that were previously written may have multiple values written +which would no longer be valid. + + + + +Edit the previously created definition yaml: Change the cardinality to `SINGLE` and add a `version`. + +```yaml +- id: io.acryl.privacy.retentionTime + # - urn: urn:li:structuredProperty:io.acryl.privacy.retentionTime # optional if id is provided + qualified_name: io.acryl.privacy.retentionTime # required if urn is provided + type: number + cardinality: SINGLE + version: '20240614080000' + display_name: Retention Time + entity_types: + - dataset # or urn:li:entityType:datahub.dataset + - dataFlow + description: "Retention Time is used to figure out how long to retain records in a dataset" + allowed_values: + - value: 30 + description: 30 days, usually reserved for datasets that are ephemeral and contain pii + - value: 90 + description: Use this for datasets that drive monthly reporting but contain pii + - value: 365 + description: Use this for non-sensitive data that can be retained for longer +``` + +Use the CLI to create your properties: +```commandline +datahub properties upsert -f {properties_yaml} +``` + +If successful, you should see `Created structured property urn:li:structuredProperty:...` + + + + +Change the cardinality to `SINGLE` and add a `version`. + +```shell +curl -X 'POST' -v \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "qualifiedName": "io.acryl.privacy.retentionTime", + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "displayName": "Retention Time", + "cardinality": "SINGLE", + "version": "20240614080000", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "allowedValues": [ + { + "value": {"double": 30}, + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii" + }, + { + "value": {"double": 60}, + "description": "Use this for datasets that drive monthly reporting but contain pii" + }, + { + "value": {"double": 365}, + "description": "Use this for non-sensitive data that can be retained for longer" + } + ] +}' | jq +``` + +Example Response: + +```json +{ + "urn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "propertyDefinition": { + "value": { + "allowedValues": [ + { + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii", + "value": { + "double": 30 + } + }, + { + "description": "Use this for datasets that drive monthly reporting but contain pii", + "value": { + "double": 60 + } + }, + { + "description": "Use this for non-sensitive data that can be retained for longer", + "value": { + "double": 365 + } + } + ], + "displayName": "Retention Time", + "qualifiedName": "io.acryl.privacy.retentionTime", + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "version": "20240614080000", + "cardinality": "SINGLE" + } + } +} +``` + + +## Structured Properties & Search + +Currently Structured Properties can be used to filter search results. This currently excludes fulltext search. + +The following examples re-use the two previously defined Structured Properties. + +`io.acryl.privacy.retentionTime` - An example numeric property. + +`io.acryl.privacy.retentionTime02` - An example string property. + + + + +Range Query: + +Document should be returned based on the previously assigned value of 60. + +```graphql +query { + scrollAcrossEntities( + input: { + types: DATASET, + count: 10, + query: "*", + orFilters: { + and: [ + { + field: "structuredProperties.io.acryl.privacy.retentionTime", + condition: GREATER_THAN, + values: [ + "45.0" + ] + } + ] + } + } + ) { + searchResults { + entity { + urn, + type + } + } + } +} +``` + +Exists Query: + +Document should be returned based on the previously assigned value. + +```graphql +query { + scrollAcrossEntities( + input: { + types: DATASET, + count: 10, + query: "*", + orFilters: { + and: [ + { + field: "structuredProperties.io.acryl.privacy.retentionTime", + condition: EXISTS + } + ] + } + } + ) { + searchResults { + entity { + urn, + type + } + } + } +} +``` + +Equality Query: + +Document should be returned based on the previously assigned value of 'bar2'. + +```graphql +query { + scrollAcrossEntities( + input: { + types: DATASET, + count: 10, + query: "*", + orFilters: { + and: [ + { + field: "structuredProperties.io.acryl.privacy.retentionTime02", + condition: EQUAL + values: [ + "bar2" + ] + } + ] + } + } + ) { + searchResults { + entity { + urn, + type + } + } + } +} +``` + + + + + +Unlike GraphQL which has a parsed input object for filtering, OpenAPI only includes a structured query which +relies on the `query_string` syntax. See the Elasticsearch [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-query-string-query.html) for detailed syntax. + +In order to use the `query_string` syntax we'll need to know a bit about the Structured Property's definition such +as whether it is versioned or un-unversioned and its type. This information will be added to the `query` url parameter. + +Un-versioned Example: + +Structured Property URN - `urn:li:structuredProperty:io.acryl.privacy.retentionTime` + +Elasticsearch Field Name - `structuredProperties.io_acryl_privacy_retentionTime` + +Versioned: + +Structured Property Version - `20240614080000` + +Structured Property Type - `string` + +Structured Property URN - `urn:li:structuredProperty:io.acryl.privacy.retentionTime02` + +Elasticsearch Field Name - `structuredProperties._versioned.io_acryl_privacy_retentionTime02.20240614080000.string` + +Range Query: + +query - `structuredProperties.io_acryl_privacy_retentionTime:>45` + +```shell +curl -X 'GET' \ + 'http://localhost:9002/openapi/v3/entity/dataset?systemMetadata=false&aspects=datasetKey&aspects=structuredProperties&count=10&sort=urn&sortOrder=ASCENDING&query=structuredProperties.io_acryl_privacy_retentionTime%3A%3E45' \ + -H 'accept: application/json' +``` + +Example Response: + +```json +{ + "entities": [ + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "datasetKey": { + "value": { + "name": "SampleHiveDataset", + "platform": "urn:li:dataPlatform:hive", + "origin": "PROD" + } + }, + "structuredProperties": { + "value": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "values": [ + { + "double": 60 + } + ] + }, + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02", + "values": [ + { + "string": "bar2" + } + ] + } + ] + } + } + } + ] +} +``` + +Exists Query: + +query - `_exists_:structuredProperties.io_acryl_privacy_retentionTime` + +```shell +curl -X 'GET' \ + 'http://localhost:9002/openapi/v3/entity/dataset?systemMetadata=false&aspects=datasetKey&aspects=structuredProperties&count=10&sort=urn&sortOrder=ASCENDING&query=_exists_%3AstructuredProperties.io_acryl_privacy_retentionTime' \ + -H 'accept: application/json' +``` + +Example Response: + +```json +{ + "entities": [ + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "datasetKey": { + "value": { + "name": "SampleHiveDataset", + "platform": "urn:li:dataPlatform:hive", + "origin": "PROD" + } + }, + "structuredProperties": { + "value": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "values": [ + { + "double": 60 + } + ] + }, + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02", + "values": [ + { + "string": "bar2" + } + ] + } + ] + } + } + } + ] +} +``` + +Equality Query: + +query - `structuredProperties._versioned.io_acryl_privacy_retentionTime02.20240614080000.string` + +```shell +curl -X 'GET' \ + 'http://localhost:9002/openapi/v3/entity/dataset?systemMetadata=false&aspects=datasetKey&aspects=structuredProperties&count=10&sort=urn&sortOrder=ASCENDING&query=structuredProperties._versioned.io_acryl_privacy_retentionTime02.20240614080000.string' \ + -H 'accept: application/json' +``` + +Example Response: + +```json +{ + "entities": [ + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "datasetKey": { + "value": { + "name": "SampleHiveDataset", + "platform": "urn:li:dataPlatform:hive", + "origin": "PROD" + } + }, + "structuredProperties": { + "value": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "values": [ + { + "double": 60 + } + ] + }, + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02", + "values": [ + { + "string": "bar2" + } + ] + } + ] + } + } + } + ] +} +``` + + + \ No newline at end of file diff --git a/docs/deploy/environment-vars.md b/docs/deploy/environment-vars.md index e2354e398ecb9..3314d2db1f467 100644 --- a/docs/deploy/environment-vars.md +++ b/docs/deploy/environment-vars.md @@ -45,23 +45,25 @@ DataHub works. ## Search -| Variable | Default | Unit/Type | Components | Description | -|-----------------------------------------------------|------------------------|-----------|-----------------------------------------------------------------|--------------------------------------------------------------------------| -| `INDEX_PREFIX` | `` | string | [`GMS`, `MAE Consumer`, `Elasticsearch Setup`, `System Update`] | Prefix Elasticsearch indices with the given string. | -| `ELASTICSEARCH_NUM_SHARDS_PER_INDEX` | 1 | integer | [`System Update`] | Default number of shards per Elasticsearch index. | -| `ELASTICSEARCH_NUM_REPLICAS_PER_INDEX` | 1 | integer | [`System Update`] | Default number of replica per Elasticsearch index. | -| `ELASTICSEARCH_BUILD_INDICES_RETENTION_VALUE` | 60 | integer | [`System Update`] | Number of units for the retention of Elasticsearch clone/backup indices. | -| `ELASTICSEARCH_BUILD_INDICES_RETENTION_UNIT` | DAYS | string | [`System Update`] | Unit for the retention of Elasticsearch clone/backup indices. | -| `ELASTICSEARCH_QUERY_EXACT_MATCH_EXCLUSIVE` | `false` | boolean | [`GMS`] | Only return exact matches when using quotes. | -| `ELASTICSEARCH_QUERY_EXACT_MATCH_WITH_PREFIX` | `true` | boolean | [`GMS`] | Include prefix match in exact match results. | -| `ELASTICSEARCH_QUERY_EXACT_MATCH_FACTOR` | 10.0 | float | [`GMS`] | Multiply by this number on true exact match. | -| `ELASTICSEARCH_QUERY_EXACT_MATCH_PREFIX_FACTOR` | 1.6 | float | [`GMS`] | Multiply by this number when prefix match. | -| `ELASTICSEARCH_QUERY_EXACT_MATCH_CASE_FACTOR` | 0.7 | float | [`GMS`] | Multiply by this number when case insensitive match. | -| `ELASTICSEARCH_QUERY_EXACT_MATCH_ENABLE_STRUCTURED` | `true` | boolean | [`GMS`] | When using structured query, also include exact matches. | -| `ELASTICSEARCH_QUERY_PARTIAL_URN_FACTOR` | 0.5 | float | [`GMS`] | Multiply by this number when partial token match on URN) | -| `ELASTICSEARCH_QUERY_PARTIAL_FACTOR` | 0.4 | float | [`GMS`] | Multiply by this number when partial token match on non-URN field. | -| `ELASTICSEARCH_QUERY_CUSTOM_CONFIG_ENABLED` | `false` | boolean | [`GMS`] | Enable search query and ranking customization configuration. | -| `ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE` | `search_config.yml` | string | [`GMS`] | The location of the search customization configuration. | +| Variable | Default | Unit/Type | Components | Description | +|-----------------------------------------------------|---------------------|-----------|-----------------------------------------------------------------|--------------------------------------------------------------------------| +| `INDEX_PREFIX` | `` | string | [`GMS`, `MAE Consumer`, `Elasticsearch Setup`, `System Update`] | Prefix Elasticsearch indices with the given string. | +| `ELASTICSEARCH_NUM_SHARDS_PER_INDEX` | 1 | integer | [`System Update`] | Default number of shards per Elasticsearch index. | +| `ELASTICSEARCH_NUM_REPLICAS_PER_INDEX` | 1 | integer | [`System Update`] | Default number of replica per Elasticsearch index. | +| `ELASTICSEARCH_BUILD_INDICES_RETENTION_VALUE` | 60 | integer | [`System Update`] | Number of units for the retention of Elasticsearch clone/backup indices. | +| `ELASTICSEARCH_BUILD_INDICES_RETENTION_UNIT` | DAYS | string | [`System Update`] | Unit for the retention of Elasticsearch clone/backup indices. | +| `ELASTICSEARCH_QUERY_EXACT_MATCH_EXCLUSIVE` | `false` | boolean | [`GMS`] | Only return exact matches when using quotes. | +| `ELASTICSEARCH_QUERY_EXACT_MATCH_WITH_PREFIX` | `true` | boolean | [`GMS`] | Include prefix match in exact match results. | +| `ELASTICSEARCH_QUERY_EXACT_MATCH_FACTOR` | 10.0 | float | [`GMS`] | Multiply by this number on true exact match. | +| `ELASTICSEARCH_QUERY_EXACT_MATCH_PREFIX_FACTOR` | 1.6 | float | [`GMS`] | Multiply by this number when prefix match. | +| `ELASTICSEARCH_QUERY_EXACT_MATCH_CASE_FACTOR` | 0.7 | float | [`GMS`] | Multiply by this number when case insensitive match. | +| `ELASTICSEARCH_QUERY_EXACT_MATCH_ENABLE_STRUCTURED` | `true` | boolean | [`GMS`] | When using structured query, also include exact matches. | +| `ELASTICSEARCH_QUERY_PARTIAL_URN_FACTOR` | 0.5 | float | [`GMS`] | Multiply by this number when partial token match on URN) | +| `ELASTICSEARCH_QUERY_PARTIAL_FACTOR` | 0.4 | float | [`GMS`] | Multiply by this number when partial token match on non-URN field. | +| `ELASTICSEARCH_QUERY_CUSTOM_CONFIG_ENABLED` | `false` | boolean | [`GMS`] | Enable search query and ranking customization configuration. | +| `ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE` | `search_config.yml` | string | [`GMS`] | The location of the search customization configuration. | +| `ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX` | `false` | boolean | [`System Update`] | Enable reindexing on Elasticsearch schema changes. | +| `ENABLE_STRUCTURED_PROPERTIES_SYSTEM_UPDATE` | `false` | boolean | [`System Update`] | Enable reindexing to remove hard deleted structured properties. | ## Kafka diff --git a/entity-registry/build.gradle b/entity-registry/build.gradle index 484a1f3271dbb..2dedea1f16d99 100644 --- a/entity-registry/build.gradle +++ b/entity-registry/build.gradle @@ -1,6 +1,7 @@ plugins { id 'pegasus' id 'java-library' + id 'java-test-fixtures' } dependencies { @@ -45,5 +46,8 @@ dependencies { exclude group: 'com.fasterxml.jackson.core', module: 'jackson-databind' } + testFixturesImplementation externalDependency.mockito + testFixturesCompileOnly externalDependency.lombok + testFixturesAnnotationProcessor externalDependency.lombok } compileTestJava.dependsOn tasks.getByPath(':entity-registry:custom-test-model:modelDeploy') diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/RetrieverContext.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/RetrieverContext.java index df1b9c6a6259c..629d2c0aad52d 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/RetrieverContext.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/RetrieverContext.java @@ -1,7 +1,11 @@ package com.linkedin.metadata.aspect; +import com.linkedin.metadata.entity.SearchRetriever; + public interface RetrieverContext { GraphRetriever getGraphRetriever(); AspectRetriever getAspectRetriever(); + + SearchRetriever getSearchRetriever(); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java index 031625da0477c..a302632e1936f 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java @@ -129,6 +129,16 @@ static Stream applyMCPSideEffects( .flatMap(mcpSideEffect -> mcpSideEffect.apply(items, retrieverContext)); } + default Stream applyPostMCPSideEffects(Collection items) { + return applyPostMCPSideEffects(items, getRetrieverContext()); + } + + static Stream applyPostMCPSideEffects( + Collection items, @Nonnull RetrieverContext retrieverContext) { + return retrieverContext.getAspectRetriever().getEntityRegistry().getAllMCPSideEffects().stream() + .flatMap(mcpSideEffect -> mcpSideEffect.postApply(items, retrieverContext)); + } + default Stream applyMCLSideEffects(Collection items) { return applyMCLSideEffects(items, getRetrieverContext()); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java index c812aea0c55d7..183b726fe0440 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java @@ -82,9 +82,34 @@ public PluginFactory loadPlugins() { this.mutationHooks = buildMutationHooks(this.pluginConfiguration); this.mclSideEffects = buildMCLSideEffects(this.pluginConfiguration); this.mcpSideEffects = buildMCPSideEffects(this.pluginConfiguration); + logSummary( + Stream.of( + this.aspectPayloadValidators, + this.mutationHooks, + this.mclSideEffects, + this.mcpSideEffects) + .flatMap(List::stream) + .collect(Collectors.toList())); return this; } + private void logSummary(List pluginSpecs) { + if (!pluginSpecs.isEmpty()) { + log.info( + "Enabled {} plugins. {}", + pluginSpecs.size(), + pluginSpecs.stream() + .map( + v -> + String.join( + ", ", + Collections.singletonList( + String.format("%s", v.getConfig().getClassName())))) + .sorted() + .collect(Collectors.toList())); + } + } + /** * Memory intensive operation because of the size of the jars. Limit packages, classes scanned, * cache results diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java index 845f967c0a528..52920d8c6f396 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java @@ -2,6 +2,8 @@ import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.plugins.PluginSpec; import java.util.Collection; import java.util.function.BiFunction; @@ -28,6 +30,40 @@ public final Stream apply( retrieverContext); } + /** + * Apply MCP Side Effects after commit. + * + * @param mclItems MCL items generated by MCP commit. + * @param retrieverContext accessors for aspect and graph data + * @return additional MCPs + */ + public final Stream postApply( + Collection mclItems, @Nonnull RetrieverContext retrieverContext) { + return postMCPSideEffect( + mclItems.stream() + .filter(item -> shouldApply(item.getChangeType(), item.getUrn(), item.getAspectSpec())) + .collect(Collectors.toList()), + retrieverContext); + } + + /** + * Generate additional MCPs during the transaction of the given MCPs + * + * @param changeMCPS MCPs being committed + * @param retrieverContext accessors for aspect and graph data + * @return additional MCPs + */ protected abstract Stream applyMCPSideEffect( Collection changeMCPS, @Nonnull RetrieverContext retrieverContext); + + /** + * Generate additional MCPs after the transaction of an MCP. This task will not block the + * production of the MCL for downstream processing. + * + * @param mclItems MCL items generated from committing the MCP + * @param retrieverContext accessors for aspect and graph data + * @return additional MCPs + */ + protected abstract Stream postMCPSideEffect( + Collection mclItems, @Nonnull RetrieverContext retrieverContext); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java new file mode 100644 index 0000000000000..eaa106b8d1f63 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java @@ -0,0 +1,24 @@ +package com.linkedin.metadata.entity; + +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.ScrollResult; +import java.util.List; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public interface SearchRetriever { + /** + * Returns search results for the given entities, filtered and sorted. + * + * @param entities list of entities to search + * @param filters filters to apply + * @param scrollId pagination token + * @param count size of a page + * @return result of the search + */ + ScrollResult scroll( + @Nonnull List entities, + @Nullable Filter filters, + @Nullable String scrollId, + int count); +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java b/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java index d8fb67c60469b..0ed492643980f 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java @@ -1,33 +1,44 @@ package com.linkedin.metadata.models; import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD_PREFIX; import com.google.common.collect.ImmutableSet; import com.linkedin.common.Status; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.GetMode; +import com.linkedin.entity.Aspect; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.structured.PrimitivePropertyValue; import com.linkedin.structured.StructuredProperties; +import com.linkedin.structured.StructuredPropertyDefinition; import com.linkedin.structured.StructuredPropertyValueAssignment; import com.linkedin.structured.StructuredPropertyValueAssignmentArray; import com.linkedin.util.Pair; import java.sql.Date; import java.time.format.DateTimeParseException; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.Map; +import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; +@Slf4j public class StructuredPropertyUtils { private StructuredPropertyUtils() {} @@ -35,26 +46,141 @@ private StructuredPropertyUtils() {} static final Date MIN_DATE = Date.valueOf("1000-01-01"); static final Date MAX_DATE = Date.valueOf("9999-12-31"); + public static LogicalValueType getLogicalValueType( + StructuredPropertyDefinition structuredPropertyDefinition) { + return getLogicalValueType(structuredPropertyDefinition.getValueType()); + } + + public static LogicalValueType getLogicalValueType(@Nullable Urn valueType) { + String valueTypeId = getValueTypeId(valueType); + if ("string".equals(valueTypeId)) { + return LogicalValueType.STRING; + } else if ("date".equals(valueTypeId)) { + return LogicalValueType.DATE; + } else if ("number".equals(valueTypeId)) { + return LogicalValueType.NUMBER; + } else if ("urn".equals(valueTypeId)) { + return LogicalValueType.URN; + } else if ("rich_text".equals(valueTypeId)) { + return LogicalValueType.RICH_TEXT; + } + return LogicalValueType.UNKNOWN; + } + + @Nullable + public static String getValueTypeId(@Nullable final Urn valueType) { + if (valueType != null) { + String valueTypeId = valueType.getId(); + if (valueTypeId.startsWith("datahub.")) { + valueTypeId = valueTypeId.split("\\.")[1]; + } + return valueTypeId.toLowerCase(); + } else { + return null; + } + } + + /** + * Lookup structured property definition given the name used for the field in APIs such as a + * search filter or aggregation query facet name. + * + * @param fieldOrFacetName the field name used in a filter or facet name in an aggregation query + * @param aspectRetriever method to look up the definition aspect + * @return the structured property definition if found + */ + public static Optional> + lookupDefinitionFromFilterOrFacetName( + @Nonnull String fieldOrFacetName, @Nullable AspectRetriever aspectRetriever) { + if (fieldOrFacetName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD + ".")) { + String fqn = + fieldOrFacetName + .substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1) + .replace(".keyword", "") + .replace(".delimited", ""); + Urn urn = toURNFromFQN(fqn); + Map> result = + Objects.requireNonNull(aspectRetriever) + .getLatestAspectObjects( + Collections.singleton(urn), + Collections.singleton(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)); + Optional definition = + Optional.ofNullable( + result + .getOrDefault(urn, Collections.emptyMap()) + .getOrDefault(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, null)); + return definition.map( + definitonAspect -> + Pair.of(urn, new StructuredPropertyDefinition(definitonAspect.data()))); + } + return Optional.empty(); + } + /** - * Sanitizes fully qualified name for use in an ElasticSearch field name Replaces . and " " + * Given the structured property definition extract the Elasticsearch field name with nesting and + * character replacement. + * + *

Sanitizes fully qualified name for use in an ElasticSearch field name Replaces `.` * characters * - * @param fullyQualifiedName The original fully qualified name of the property + * @param definition The structured property definition * @return The sanitized version that can be used as a field name */ - public static String sanitizeStructuredPropertyFQN(@Nonnull String fullyQualifiedName) { - if (fullyQualifiedName.contains(" ")) { + public static String toElasticsearchFieldName( + @Nonnull Urn propertyUrn, @Nullable StructuredPropertyDefinition definition) { + String qualifiedName = definition != null ? definition.getQualifiedName() : propertyUrn.getId(); + + if (qualifiedName.contains(" ")) { throw new IllegalArgumentException( "Fully qualified structured property name cannot contain spaces"); } - return fullyQualifiedName.replace('.', '_'); + if (definition != null && definition.getVersion(GetMode.NULL) != null) { + // includes type suffix + return String.join( + ".", + STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD, + definition.getQualifiedName().replace('.', '_'), + definition.getVersion(), + getLogicalValueType(definition).name().toLowerCase()); + } else { + // un-typed property + return qualifiedName.replace('.', '_'); + } + } + + /** + * Return an elasticsearch type from structured property type + * + * @param fieldName filter or facet field name + * @param aspectRetriever aspect retriever + * @return elasticsearch type + */ + public static Set toElasticsearchFieldType( + @Nonnull String fieldName, @Nullable AspectRetriever aspectRetriever) { + LogicalValueType logicalValueType = + lookupDefinitionFromFilterOrFacetName(fieldName, aspectRetriever) + .map(definition -> getLogicalValueType(definition.getValue())) + .orElse(LogicalValueType.STRING); + + switch (logicalValueType) { + case NUMBER: + return Collections.singleton("double"); + case DATE: + return Collections.singleton("long"); + case RICH_TEXT: + return Collections.singleton("text"); + case UNKNOWN: + case STRING: + case URN: + default: + return Collections.singleton("keyword"); + } } public static void validateStructuredPropertyFQN( @Nonnull Collection fullyQualifiedNames, @Nonnull AspectRetriever aspectRetriever) { Set structuredPropertyUrns = fullyQualifiedNames.stream() - .map(StructuredPropertyUtils::toURNFromFieldName) + .map(StructuredPropertyUtils::toURNFromFQN) .collect(Collectors.toSet()); Set removedUrns = getRemovedUrns(structuredPropertyUrns, aspectRetriever); if (!removedUrns.isEmpty()) { @@ -63,13 +189,19 @@ public static void validateStructuredPropertyFQN( } } - public static Urn toURNFromFieldName(@Nonnull String fieldName) { + /** + * Given a Structured Property fqn, calculate the expected URN + * + * @param fqn structured property's fqn + * @return the expected structured property urn + */ + private static Urn toURNFromFQN(@Nonnull String fqn) { return UrnUtils.getUrn( - String.join(":", "urn:li", STRUCTURED_PROPERTY_ENTITY_NAME, fieldName.replace('_', '.'))); + String.join(":", "urn:li", STRUCTURED_PROPERTY_ENTITY_NAME, fqn.replace('_', '.'))); } public static void validateFilter( - @Nullable Filter filter, @Nonnull AspectRetriever aspectRetriever) { + @Nullable Filter filter, @Nullable AspectRetriever aspectRetriever) { if (filter == null) { return; @@ -80,7 +212,7 @@ public static void validateFilter( if (filter.getCriteria() != null) { for (Criterion c : filter.getCriteria()) { if (c.getField().startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) { - fieldNames.add(c.getField().substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1)); + fieldNames.add(stripStructuredPropertyPrefix(c.getField())); } } } @@ -89,15 +221,24 @@ public static void validateFilter( for (ConjunctiveCriterion cc : filter.getOr()) { for (Criterion c : cc.getAnd()) { if (c.getField().startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) { - fieldNames.add(c.getField().substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1)); + fieldNames.add(stripStructuredPropertyPrefix(c.getField())); } } } } if (!fieldNames.isEmpty()) { - validateStructuredPropertyFQN(fieldNames, aspectRetriever); + validateStructuredPropertyFQN(fieldNames, Objects.requireNonNull(aspectRetriever)); + } + } + + private static String stripStructuredPropertyPrefix(String s) { + if (s.startsWith(STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD_PREFIX)) { + return s.substring(STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD.length() + 1).split("[.]")[0]; + } else if (s.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) { + return s.substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1).split("[.]")[0]; } + return s; } public static Date toDate(PrimitivePropertyValue value) throws DateTimeParseException { @@ -120,7 +261,7 @@ public static boolean isValidDate(PrimitivePropertyValue value) { return date.compareTo(MIN_DATE) >= 0 && date.compareTo(MAX_DATE) <= 0; } - private static Set getRemovedUrns(Set urns, AspectRetriever aspectRetriever) { + private static Set getRemovedUrns(Set urns, @Nonnull AspectRetriever aspectRetriever) { return aspectRetriever .getLatestAspectObjects(urns, ImmutableSet.of(STATUS_ASPECT_NAME)) .entrySet() diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java index 8e877d1d23aad..1137c7c55880f 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java @@ -7,6 +7,8 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import java.util.Collection; @@ -72,5 +74,11 @@ protected Stream applyMCPSideEffect( Collection changeMCPS, @Nonnull RetrieverContext retrieverContext) { return changeMCPS.stream(); } + + @Override + protected Stream postMCPSideEffect( + Collection mclItems, @Nonnull RetrieverContext retrieverContext) { + return Stream.of(); + } } } diff --git a/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java similarity index 96% rename from entity-registry/src/test/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java rename to entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java index 9c7cd997a9af0..62e22efa9da16 100644 --- a/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java +++ b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java @@ -1,7 +1,5 @@ package com.linkedin.test.metadata.aspect; -import static org.mockito.Mockito.mock; - import com.linkedin.common.Status; import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; @@ -17,6 +15,7 @@ import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import org.mockito.Mockito; public class MockAspectRetriever implements AspectRetriever { private final Map> data; @@ -64,6 +63,6 @@ public Map> getLatestAspectObjects( @Nonnull @Override public EntityRegistry getEntityRegistry() { - return mock(EntityRegistry.class); + return Mockito.mock(EntityRegistry.class); } } diff --git a/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/TestEntityRegistry.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/TestEntityRegistry.java similarity index 100% rename from entity-registry/src/test/java/com/linkedin/test/metadata/aspect/TestEntityRegistry.java rename to entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/TestEntityRegistry.java diff --git a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCL.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCL.java new file mode 100644 index 0000000000000..7dd889c48b874 --- /dev/null +++ b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCL.java @@ -0,0 +1,32 @@ +package com.linkedin.test.metadata.aspect.batch; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.mxe.MetadataChangeLog; +import javax.annotation.Nonnull; +import lombok.Builder; +import lombok.Getter; + +@Builder(toBuilder = true) +@Getter +public class TestMCL implements MCLItem { + private Urn urn; + private ChangeType changeType; + private MetadataChangeLog metadataChangeLog; + private RecordTemplate previousRecordTemplate; + private RecordTemplate recordTemplate; + private EntitySpec entitySpec; + private AspectSpec aspectSpec; + private AuditStamp auditStamp; + + @Nonnull + @Override + public String getAspectName() { + return getAspectSpec().getName(); + } +} diff --git a/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java similarity index 100% rename from entity-registry/src/test/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java rename to entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 752bf44cf4354..9a7b8287e2c6a 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -21,6 +21,10 @@ public class Constants { public static final String STRUCTURED_PROPERTY_MAPPING_FIELD = "structuredProperties"; public static final String STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX = STRUCTURED_PROPERTY_MAPPING_FIELD + "."; + public static final String STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD = "_versioned"; + public static final String STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD_PREFIX = + String.join( + ".", STRUCTURED_PROPERTY_MAPPING_FIELD, STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD, ""); // !!!!!!! IMPORTANT !!!!!!! // This effectively sets the max aspect size to 16 MB. Used in deserialization of messages. @@ -342,6 +346,7 @@ public class Constants { // Structured Property public static final String STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME = "propertyDefinition"; + public static final String STRUCTURED_PROPERTY_KEY_ASPECT_NAME = "structuredPropertyKey"; // Form public static final String FORM_INFO_ASPECT_NAME = "formInfo"; diff --git a/metadata-ingestion/scripts/modeldocgen.py b/metadata-ingestion/scripts/modeldocgen.py index ef6ce765c23ed..ea7813f0ca85b 100644 --- a/metadata-ingestion/scripts/modeldocgen.py +++ b/metadata-ingestion/scripts/modeldocgen.py @@ -504,6 +504,7 @@ class AspectPluginConfig: className: str enabled: bool supportedEntityAspectNames: List[EntityAspectName] + packageScan: Optional[List[str]] = None supportedOperations: Optional[List[str]] = None diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 5bd73c844b380..6666e33544688 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -92,6 +92,8 @@ dependencies { testImplementation 'ch.qos.logback:logback-classic:1.4.7' testImplementation 'net.datafaker:datafaker:1.9.0' + testImplementation(testFixtures(project(":entity-registry"))) + testAnnotationProcessor externalDependency.lombok constraints { diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java index f4473c8db3148..43a7d00248a22 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java @@ -10,6 +10,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; +import com.linkedin.data.ByteString; import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.AspectRetriever; @@ -21,7 +22,9 @@ import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.SystemMetadataUtils; +import com.linkedin.mxe.GenericAspect; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; import jakarta.json.Json; @@ -78,6 +81,29 @@ public RecordTemplate getRecordTemplate() { return null; } + @Nonnull + public MetadataChangeProposal getMetadataChangeProposal() { + if (metadataChangeProposal != null) { + return metadataChangeProposal; + } else { + GenericAspect genericAspect = new GenericAspect(); + genericAspect.setContentType("application/json"); + genericAspect.setValue(ByteString.copyString(getPatch().toString(), StandardCharsets.UTF_8)); + + final MetadataChangeProposal mcp = new MetadataChangeProposal(); + mcp.setEntityUrn(getUrn()); + mcp.setChangeType(getChangeType()); + mcp.setEntityType(getEntitySpec().getName()); + mcp.setAspectName(getAspectName()); + mcp.setAspect(genericAspect); + mcp.setSystemMetadata(getSystemMetadata()); + mcp.setEntityKeyAspect( + GenericRecordUtils.serializeAspect( + EntityKeyUtils.convertUrnToEntityKey(getUrn(), entitySpec.getKeyAspectSpec()))); + return mcp; + } + } + public ChangeItemImpl applyPatch(RecordTemplate recordTemplate, AspectRetriever aspectRetriever) { ChangeItemImpl.ChangeItemImplBuilder builder = ChangeItemImpl.builder() diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java index 21bac3cbb0e61..6f0cd51af0793 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java @@ -132,7 +132,7 @@ public static List getAdditionalChanges( getProposalFromAspectForDefault( entry.getKey(), entry.getValue(), entityKeyAspect, templateItem), templateItem.getAuditStamp(), - opContext.getRetrieverContext().get().getAspectRetriever())) + opContext.getAspectRetrieverOpt().get())) .filter(Objects::nonNull); }) .collect(Collectors.toList()); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 80f976f9ae81e..34c836d760a7d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -44,6 +44,7 @@ import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCLItem; import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; @@ -53,6 +54,7 @@ import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.entity.ebean.batch.DeleteItemImpl; +import com.linkedin.metadata.entity.ebean.batch.MCLItemImpl; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; import com.linkedin.metadata.entity.retention.BulkApplyRetentionArgs; @@ -100,6 +102,7 @@ import java.util.function.Consumer; import java.util.stream.Collectors; import java.util.stream.Stream; +import java.util.stream.StreamSupport; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.persistence.EntityNotFoundException; @@ -152,6 +155,7 @@ public class EntityServiceImpl implements EntityService { @Nullable @Getter private SearchIndicesService updateIndicesService; private final PreProcessHooks preProcessHooks; protected static final int MAX_KEYS_PER_QUERY = 500; + protected static final int MCP_SIDE_EFFECT_KAFKA_BATCH_SIZE = 500; private final Integer ebeanMaxTransactionRetry; private final boolean enableBrowseV2; @@ -659,7 +663,7 @@ public List ingestAspects( .recordTemplate(pair.getValue()) .systemMetadata(systemMetadata) .auditStamp(auditStamp) - .build(opContext.getRetrieverContext().get().getAspectRetriever())) + .build(opContext.getAspectRetrieverOpt().get())) .collect(Collectors.toList()); return ingestAspects( opContext, @@ -696,9 +700,50 @@ public List ingestAspects( ingestAspectsToLocalDB(opContext, aspectsBatch, overwrite); List mclResults = emitMCL(opContext, ingestResults, emitMCL); + + processPostCommitMCLSideEffects( + opContext, + mclResults.stream() + .filter(result -> !result.isNoOp()) + .map(UpdateAspectResult::toMCL) + .collect(Collectors.toList())); + return mclResults; } + /** + * Process post-commit MCPSideEffects + * + * @param mcls mcls generated + */ + private void processPostCommitMCLSideEffects( + @Nonnull OperationContext opContext, List mcls) { + log.debug("Considering {} MCLs post commit side effects.", mcls.size()); + List batch = + mcls.stream() + .map(mcl -> MCLItemImpl.builder().build(mcl, opContext.getAspectRetrieverOpt().get())) + .collect(Collectors.toList()); + + Iterable> iterable = + () -> + Iterators.partition( + AspectsBatch.applyPostMCPSideEffects(batch, opContext.getRetrieverContext().get()) + .iterator(), + MCP_SIDE_EFFECT_KAFKA_BATCH_SIZE); + StreamSupport.stream(iterable.spliterator(), false) + .forEach( + sideEffects -> { + long count = + ingestProposalAsync( + AspectsBatchImpl.builder() + .items(sideEffects) + .retrieverContext(opContext.getRetrieverContext().get()) + .build()) + .count(); + log.info("Generated {} MCP SideEffects for async processing", count); + }); + } + /** * Checks whether there is an actual update to the aspect by applying the updateLambda If there is * an update, push the new version into the local DB. Otherwise, do not push the new version, but @@ -984,7 +1029,7 @@ public RecordTemplate ingestAspectIfNotPresent( .recordTemplate(newValue) .systemMetadata(systemMetadata) .auditStamp(auditStamp) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), opContext.getRetrieverContext().get()) .build(); List ingested = ingestAspects(opContext, aspectsBatch, true, false); @@ -1082,7 +1127,7 @@ private Stream ingestTimeseriesProposal( .recordTemplate( EntityApiUtils.buildKeyAspect( opContext.getEntityRegistry(), item.getUrn())) - .build(opContext.getRetrieverContext().get().getAspectRetriever())) + .build(opContext.getAspectRetrieverOpt().get())) .collect(Collectors.toList()); ingestProposalSync( @@ -1476,7 +1521,7 @@ private RestoreIndicesResult restoreIndices( .auditStamp(auditStamp) .systemMetadata(latestSystemMetadata) .recordTemplate(EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), urn)) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); Stream defaultAspectsResult = ingestProposalSync( opContext, @@ -1807,7 +1852,7 @@ private void ingestSnapshotUnion( .recordTemplate(pair.getValue()) .auditStamp(auditStamp) .systemMetadata(systemMetadata) - .build(opContext.getRetrieverContext().get().getAspectRetriever())) + .build(opContext.getAspectRetrieverOpt().get())) .collect(Collectors.toList())) .build(); @@ -1871,6 +1916,7 @@ public RollbackRunResult rollbackWithConditions( Map conditions, boolean hardDelete) { List removedAspects = new ArrayList<>(); + List removedAspectResults = new ArrayList<>(); AtomicInteger rowsDeletedFromEntityDeletion = new AtomicInteger(0); List> futures = @@ -1878,7 +1924,7 @@ public RollbackRunResult rollbackWithConditions( .map( aspectToRemove -> { RollbackResult result = - deleteAspect( + deleteAspectWithoutMCL( opContext, aspectToRemove.getUrn(), aspectToRemove.getAspectName(), @@ -1899,6 +1945,7 @@ public RollbackRunResult rollbackWithConditions( rowsDeletedFromEntityDeletion.addAndGet(result.additionalRowsAffected); removedAspects.add(aspectToRemove); + removedAspectResults.add(result); return alwaysProduceMCLAsync( opContext, result.getUrn(), @@ -1929,12 +1976,14 @@ public RollbackRunResult rollbackWithConditions( } }); - return new RollbackRunResult(removedAspects, rowsDeletedFromEntityDeletion.get()); + return new RollbackRunResult( + removedAspects, rowsDeletedFromEntityDeletion.get(), removedAspectResults); } @Override public RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn) { List removedAspects = new ArrayList<>(); + List removedAspectResults = new ArrayList<>(); Integer rowsDeletedFromEntityDeletion = 0; final EntitySpec spec = @@ -1949,7 +1998,8 @@ public RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn) log.warn("Entity to delete does not exist. {}", urn.toString()); } if (latestKey == null || latestKey.getSystemMetadata() == null) { - return new RollbackRunResult(removedAspects, rowsDeletedFromEntityDeletion); + return new RollbackRunResult( + removedAspects, rowsDeletedFromEntityDeletion, removedAspectResults); } SystemMetadata latestKeySystemMetadata = @@ -1957,7 +2007,7 @@ public RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn) .map(SystemAspect::getSystemMetadata) .get(); RollbackResult result = - deleteAspect( + deleteAspectWithoutMCL( opContext, urn.toString(), keyAspectName, @@ -1974,6 +2024,7 @@ public RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn) rowsDeletedFromEntityDeletion = result.additionalRowsAffected; removedAspects.add(summary); + removedAspectResults.add(result); Future future = alwaysProduceMCLAsync( opContext, @@ -1999,7 +2050,8 @@ public RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn) } } - return new RollbackRunResult(removedAspects, rowsDeletedFromEntityDeletion); + return new RollbackRunResult( + removedAspects, rowsDeletedFromEntityDeletion, removedAspectResults); } @Override @@ -2052,9 +2104,9 @@ public Set exists( } } + /** Does not emit MCL */ @Nullable - @Override - public RollbackResult deleteAspect( + private RollbackResult deleteAspectWithoutMCL( @Nonnull OperationContext opContext, String urn, String aspectName, @@ -2074,7 +2126,7 @@ public RollbackResult deleteAspect( .urn(entityUrn) .aspectName(aspectName) .auditStamp(auditStamp) - .build(opContext.getRetrieverContext().get().getAspectRetriever()); + .build(opContext.getAspectRetrieverOpt().get()); // Delete validation hooks ValidationExceptionCollection exceptions = @@ -2242,6 +2294,10 @@ public RollbackResult deleteAspect( }, DEFAULT_MAX_TRANSACTION_RETRY); + if (result != null) { + processPostCommitMCLSideEffects(opContext, List.of(result.toMCL(auditStamp))); + } + return result; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java index fe9bcb0daba91..27b603244d3b3 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java @@ -765,9 +765,6 @@ private void removeIncomingEdgesFromNode( _dgraph.executeConsumer(client -> client.newTransaction().doRequest(request)); } - @Override - public void configure() {} - @Override public void clear() { log.debug("dropping Dgraph data"); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java index 37cc6ab6b1eeb..ada5069d0cabe 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java @@ -35,6 +35,7 @@ import com.linkedin.metadata.shared.ElasticSearchIndexed; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import io.opentelemetry.extension.annotations.WithSpan; import java.io.IOException; import java.util.ArrayList; @@ -274,10 +275,10 @@ public void removeEdgesFromNode( } @Override - public void configure() { + public void reindexAll(Collection> properties) { log.info("Setting up elastic graph index"); try { - for (ReindexConfig config : buildReindexConfigs()) { + for (ReindexConfig config : buildReindexConfigs(properties)) { _indexBuilder.buildIndex(config); } } catch (IOException e) { @@ -286,7 +287,8 @@ public void configure() { } @Override - public List buildReindexConfigs() throws IOException { + public List buildReindexConfigs( + Collection> properties) throws IOException { return List.of( _indexBuilder.buildReindexState( _indexConvention.getIndexName(INDEX_NAME), @@ -294,17 +296,6 @@ public List buildReindexConfigs() throws IOException { Collections.emptyMap())); } - @Override - public List buildReindexConfigsWithAllStructProps( - Collection properties) throws IOException { - return buildReindexConfigs(); - } - - @Override - public void reindexAll() { - configure(); - } - @VisibleForTesting @Override public void clear() { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java index 513672b071c17..16c0804538dd7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java @@ -698,11 +698,6 @@ public void removeNodesMatchingLabel(@Nonnull String labelPattern) { runQuery(buildStatement(statement, params)).consume(); } - @Override - public void configure() { - // Do nothing - } - @Override public void clear() { removeNodesMatchingLabel(".*"); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java index 399b0aa6e49a6..fea3fafdc845a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java @@ -146,7 +146,9 @@ private SearchRequest buildSearchRequest(@Nonnull OperationContext opContext) { // Find the entities with the most views AggregationBuilder aggregation = AggregationBuilders.terms(ENTITY_AGG_NAME) - .field(ESUtils.toKeywordField(DataHubUsageEventConstants.ENTITY_URN, false)) + .field( + ESUtils.toKeywordField( + DataHubUsageEventConstants.ENTITY_URN, false, opContext.getAspectRetriever())) .size(MAX_CONTENT * 2); source.aggregation(aggregation); source.size(0); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java index d75470127ded8..afdce0d714513 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.datahubusage.DataHubUsageEventConstants; import com.linkedin.metadata.datahubusage.DataHubUsageEventType; import com.linkedin.metadata.entity.EntityService; @@ -104,7 +105,8 @@ public List getRecommendations( @Nonnull RecommendationRequestContext requestContext, @Nullable Filter filter) { SearchRequest searchRequest = - buildSearchRequest(opContext.getSessionActorContext().getActorUrn()); + buildSearchRequest( + opContext.getSessionActorContext().getActorUrn(), opContext.getAspectRetriever()); try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "getRecentlyEdited").time()) { final SearchResponse searchResponse = _searchClient.search(searchRequest, RequestOptions.DEFAULT); @@ -128,7 +130,8 @@ public Set getSupportedEntityTypes() { return SUPPORTED_ENTITY_TYPES; } - private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) { + private SearchRequest buildSearchRequest( + @Nonnull Urn userUrn, @Nullable AspectRetriever aspectRetriever) { // TODO: Proactively filter for entity types in the supported set. SearchRequest request = new SearchRequest(); SearchSourceBuilder source = new SearchSourceBuilder(); @@ -136,7 +139,7 @@ private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) { // Filter for the entity edit events of the user requesting recommendation query.must( QueryBuilders.termQuery( - ESUtils.toKeywordField(DataHubUsageEventConstants.ACTOR_URN, false), + ESUtils.toKeywordField(DataHubUsageEventConstants.ACTOR_URN, false, aspectRetriever), userUrn.toString())); // Filter for the entity action events query.must( @@ -148,7 +151,9 @@ private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) { String lastViewed = "last_viewed"; AggregationBuilder aggregation = AggregationBuilders.terms(ENTITY_AGG_NAME) - .field(ESUtils.toKeywordField(DataHubUsageEventConstants.ENTITY_URN, false)) + .field( + ESUtils.toKeywordField( + DataHubUsageEventConstants.ENTITY_URN, false, aspectRetriever)) .size(MAX_CONTENT) .order(BucketOrder.aggregation(lastViewed, false)) .subAggregation( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java index e9613495e8d22..f282470193ae5 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.datahubusage.DataHubUsageEventConstants; import com.linkedin.metadata.datahubusage.DataHubUsageEventType; import com.linkedin.metadata.entity.EntityService; @@ -104,7 +105,8 @@ public List getRecommendations( @Nonnull RecommendationRequestContext requestContext, @Nullable Filter filter) { SearchRequest searchRequest = - buildSearchRequest(opContext.getSessionActorContext().getActorUrn()); + buildSearchRequest( + opContext.getSessionActorContext().getActorUrn(), opContext.getAspectRetriever()); try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "getRecentlyViewed").time()) { final SearchResponse searchResponse = _searchClient.search(searchRequest, RequestOptions.DEFAULT); @@ -128,7 +130,8 @@ public Set getSupportedEntityTypes() { return SUPPORTED_ENTITY_TYPES; } - private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) { + private SearchRequest buildSearchRequest( + @Nonnull Urn userUrn, @Nullable AspectRetriever aspectRetriever) { // TODO: Proactively filter for entity types in the supported set. SearchRequest request = new SearchRequest(); SearchSourceBuilder source = new SearchSourceBuilder(); @@ -136,7 +139,7 @@ private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) { // Filter for the entity view events of the user requesting recommendation query.must( QueryBuilders.termQuery( - ESUtils.toKeywordField(DataHubUsageEventConstants.ACTOR_URN, false), + ESUtils.toKeywordField(DataHubUsageEventConstants.ACTOR_URN, false, aspectRetriever), userUrn.toString())); query.must( QueryBuilders.termQuery( @@ -147,7 +150,9 @@ private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) { String lastViewed = "last_viewed"; AggregationBuilder aggregation = AggregationBuilders.terms(ENTITY_AGG_NAME) - .field(ESUtils.toKeywordField(DataHubUsageEventConstants.ENTITY_URN, false)) + .field( + ESUtils.toKeywordField( + DataHubUsageEventConstants.ENTITY_URN, false, aspectRetriever)) .size(MAX_CONTENT) .order(BucketOrder.aggregation(lastViewed, false)) .subAggregation( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java new file mode 100644 index 0000000000000..a5ef1c8fa58b1 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java @@ -0,0 +1,51 @@ +package com.linkedin.metadata.search; + +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Getter; +import lombok.Setter; + +@Getter +@Builder +public class SearchServiceSearchRetriever implements SearchRetriever { + private static final SearchFlags RETRIEVER_SEARCH_FLAGS = + new SearchFlags() + .setFulltext(false) + .setMaxAggValues(20) + .setSkipCache(false) + .setSkipAggregates(true) + .setSkipHighlighting(true) + .setIncludeSoftDeleted(false) + .setIncludeRestricted(false); + + @Setter private OperationContext systemOperationContext; + private final SearchService searchService; + + @Override + public ScrollResult scroll( + @Nonnull List entities, + @Nullable Filter filters, + @Nullable String scrollId, + int count) { + SortCriterion urnSort = new SortCriterion(); + urnSort.setField("urn"); + urnSort.setOrder(SortOrder.ASCENDING); + return searchService.scrollAcrossEntities( + systemOperationContext.withSearchFlags(flags -> RETRIEVER_SEARCH_FLAGS), + entities, + "*", + filters, + urnSort, + scrollId, + null, + count); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java index 19cd1f767f472..578c34611a75a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java @@ -21,6 +21,7 @@ import com.linkedin.metadata.search.utils.SearchUtils; import com.linkedin.metadata.shared.ElasticSearchIndexed; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import java.io.IOException; import java.util.Collection; @@ -55,24 +56,14 @@ public class ElasticSearchService implements EntitySearchService, ElasticSearchI private final ESWriteDAO esWriteDAO; @Override - public void configure() { - indexBuilders.reindexAll(); + public void reindexAll(Collection> properties) { + indexBuilders.reindexAll(properties); } @Override - public List buildReindexConfigs() { - return indexBuilders.buildReindexConfigs(); - } - - @Override - public List buildReindexConfigsWithAllStructProps( - Collection properties) throws IOException { - return indexBuilders.buildReindexConfigsWithAllStructProps(properties); - } - - @Override - public void reindexAll() { - configure(); + public List buildReindexConfigs( + Collection> properties) throws IOException { + return indexBuilders.buildReindexConfigs(properties); } @Override diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java index cc6a0f3e3d6f9..2d04e99774050 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java @@ -81,6 +81,8 @@ public class ESIndexBuilder { @Getter private final boolean enableIndexMappingsReindex; + @Getter private final boolean enableStructuredPropertiesReindex; + @Getter private final ElasticSearchConfiguration elasticSearchConfiguration; @Getter private final GitVersion gitVersion; @@ -101,6 +103,7 @@ public ESIndexBuilder( Map> indexSettingOverrides, boolean enableIndexSettingsReindex, boolean enableIndexMappingsReindex, + boolean enableStructuredPropertiesReindex, ElasticSearchConfiguration elasticSearchConfiguration, GitVersion gitVersion) { this._searchClient = searchClient; @@ -112,6 +115,7 @@ public ESIndexBuilder( this.enableIndexSettingsReindex = enableIndexSettingsReindex; this.enableIndexMappingsReindex = enableIndexMappingsReindex; this.elasticSearchConfiguration = elasticSearchConfiguration; + this.enableStructuredPropertiesReindex = enableStructuredPropertiesReindex; this.gitVersion = gitVersion; RetryConfig config = @@ -143,6 +147,8 @@ public ReindexConfig buildReindexState( .name(indexName) .enableIndexSettingsReindex(enableIndexSettingsReindex) .enableIndexMappingsReindex(enableIndexMappingsReindex) + .enableStructuredPropertiesReindex( + enableStructuredPropertiesReindex && !copyStructuredPropertyMappings) .version(gitVersion.getVersion()); Map baseSettings = new HashMap<>(settings); @@ -293,7 +299,7 @@ public void buildIndex(ReindexConfig indexState) throws IOException { * @throws IOException communication issues with ES */ public void applyMappings(ReindexConfig indexState, boolean suppressError) throws IOException { - if (indexState.isPureMappingsAddition() || indexState.isPureStructuredProperty()) { + if (indexState.isPureMappingsAddition() || indexState.isPureStructuredPropertyAddition()) { log.info("Updating index {} mappings in place.", indexState.name()); PutMappingRequest request = new PutMappingRequest(indexState.name()).source(indexState.targetMappings()); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java index afc831b004ec3..eba4593c9042c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java @@ -1,9 +1,11 @@ package com.linkedin.metadata.search.elasticsearch.indexbuilder; +import com.linkedin.common.urn.Urn; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.shared.ElasticSearchIndexed; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.io.IOException; import java.util.Collection; import java.util.List; @@ -26,8 +28,8 @@ public ESIndexBuilder getIndexBuilder() { } @Override - public void reindexAll() { - for (ReindexConfig config : buildReindexConfigs()) { + public void reindexAll(Collection> properties) { + for (ReindexConfig config : buildReindexConfigs(properties)) { try { indexBuilder.buildIndex(config); } catch (IOException e) { @@ -37,26 +39,8 @@ public void reindexAll() { } @Override - public List buildReindexConfigs() { - Map settings = settingsBuilder.getSettings(); - MappingsBuilder.setEntityRegistry(entityRegistry); - return entityRegistry.getEntitySpecs().values().stream() - .map( - entitySpec -> { - try { - Map mappings = MappingsBuilder.getMappings(entitySpec); - return indexBuilder.buildReindexState( - indexConvention.getIndexName(entitySpec), mappings, settings, true); - } catch (IOException e) { - throw new RuntimeException(e); - } - }) - .collect(Collectors.toList()); - } - - @Override - public List buildReindexConfigsWithAllStructProps( - Collection properties) { + public List buildReindexConfigs( + Collection> properties) { Map settings = settingsBuilder.getSettings(); MappingsBuilder.setEntityRegistry(entityRegistry); return entityRegistry.getEntitySpecs().values().stream() @@ -81,7 +65,7 @@ public List buildReindexConfigsWithAllStructProps( * @return index configurations impacted by the new property */ public List buildReindexConfigsWithNewStructProp( - StructuredPropertyDefinition property) { + Urn urn, StructuredPropertyDefinition property) { Map settings = settingsBuilder.getSettings(); MappingsBuilder.setEntityRegistry(entityRegistry); return entityRegistry.getEntitySpecs().values().stream() @@ -89,7 +73,7 @@ public List buildReindexConfigsWithNewStructProp( entitySpec -> { try { Map mappings = - MappingsBuilder.getMappings(entitySpec, List.of(property)); + MappingsBuilder.getMappings(entitySpec, List.of(Pair.of(urn, property))); return indexBuilder.buildReindexState( indexConvention.getIndexName(entitySpec), mappings, settings, true); } catch (IOException e) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java index f8d0f165bcddf..5dc28a8fd598d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java @@ -2,7 +2,7 @@ import static com.linkedin.metadata.Constants.ENTITY_TYPE_URN_PREFIX; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD; -import static com.linkedin.metadata.models.StructuredPropertyUtils.sanitizeStructuredPropertyFQN; +import static com.linkedin.metadata.models.StructuredPropertyUtils.toElasticsearchFieldName; import static com.linkedin.metadata.models.annotation.SearchableAnnotation.OBJECT_FIELD_TYPES; import static com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder.*; @@ -17,11 +17,13 @@ import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.utils.ESUtils; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.net.URISyntaxException; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; @@ -52,6 +54,15 @@ public static Map getPartialNgramConfigWithOverrides( public static final String WORD_GRAMS_LENGTH_2 = "wordGrams2"; public static final String WORD_GRAMS_LENGTH_3 = "wordGrams3"; public static final String WORD_GRAMS_LENGTH_4 = "wordGrams4"; + public static final Set SUBFIELDS = + Set.of( + KEYWORD, + DELIMITED, + LENGTH, + NGRAM, + WORD_GRAMS_LENGTH_2, + WORD_GRAMS_LENGTH_3, + WORD_GRAMS_LENGTH_4); // Alias field mappings constants public static final String ALIAS = "alias"; @@ -72,7 +83,7 @@ private MappingsBuilder() {} */ public static Map getMappings( @Nonnull final EntitySpec entitySpec, - Collection structuredProperties) { + Collection> structuredProperties) { Map mappings = getMappings(entitySpec); String entityName = entitySpec.getEntityAnnotation().getName(); @@ -80,9 +91,11 @@ public static Map getMappings( getMappingsForStructuredProperty( structuredProperties.stream() .filter( - prop -> { + urnProp -> { try { - return prop.getEntityTypes() + return urnProp + .getSecond() + .getEntityTypes() .contains(Urn.createFromString(ENTITY_TYPE_URN_PREFIX + entityName)); } catch (URISyntaxException e) { return false; @@ -165,10 +178,11 @@ private static Map getMappingsForSystemCreated() { } public static Map getMappingsForStructuredProperty( - Collection properties) { + Collection> properties) { return properties.stream() .map( - property -> { + urnProperty -> { + StructuredPropertyDefinition property = urnProperty.getSecond(); Map mappingForField = new HashMap<>(); String valueType = property.getValueType().getId(); if (valueType.equalsIgnoreCase(LogicalValueType.STRING.name())) { @@ -183,7 +197,7 @@ public static Map getMappingsForStructuredProperty( mappingForField.put(TYPE, ESUtils.DOUBLE_FIELD_TYPE); } return Map.entry( - sanitizeStructuredPropertyFQN(property.getQualifiedName()), mappingForField); + toElasticsearchFieldName(urnProperty.getFirst(), property), mappingForField); }) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java index fbb7fcadba8bc..fd8db19ea6a7b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java @@ -11,6 +11,9 @@ import com.google.common.collect.MapDifference; import com.google.common.collect.Maps; import com.linkedin.metadata.search.utils.ESUtils; +import com.linkedin.util.Pair; +import java.util.AbstractMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -61,6 +64,7 @@ public class ReindexConfig { private final Map targetMappings; private final boolean enableIndexMappingsReindex; private final boolean enableIndexSettingsReindex; + private final boolean enableStructuredPropertiesReindex; private final String version; /* Calculated */ @@ -70,7 +74,8 @@ public class ReindexConfig { private final boolean isPureMappingsAddition; private final boolean isSettingsReindex; private final boolean hasNewStructuredProperty; - private final boolean isPureStructuredProperty; + private final boolean isPureStructuredPropertyAddition; + private final boolean hasRemovedStructuredProperty; public static ReindexConfigBuilder builder() { return new CalculatedBuilder(); @@ -102,7 +107,11 @@ private ReindexConfigBuilder hasNewStructuredProperty(boolean ignored) { return this; } - private ReindexConfigBuilder isPureStructuredProperty(boolean ignored) { + private ReindexConfigBuilder isPureStructuredPropertyAddition(boolean ignored) { + return this; + } + + private ReindexConfigBuilder hasRemovedStructuredProperty(boolean ignored) { return this; } @@ -156,7 +165,7 @@ public ReindexConfig build() { super.requiresApplyMappings = !mappingsDiff.entriesDiffering().isEmpty() || !mappingsDiff.entriesOnlyOnRight().isEmpty(); - super.isPureStructuredProperty = + super.isPureStructuredPropertyAddition = mappingsDiff .entriesDiffering() .keySet() @@ -169,6 +178,22 @@ public ReindexConfig build() { super.requiresApplyMappings && mappingsDiff.entriesDiffering().isEmpty() && !mappingsDiff.entriesOnlyOnRight().isEmpty(); + super.hasNewStructuredProperty = + (mappingsDiff.entriesDiffering().containsKey(STRUCTURED_PROPERTY_MAPPING_FIELD) + || mappingsDiff + .entriesOnlyOnRight() + .containsKey(STRUCTURED_PROPERTY_MAPPING_FIELD)) + && structuredPropertiesDiffCount(super.currentMappings, super.targetMappings) + .getSecond() + > 0; + super.hasRemovedStructuredProperty = + (mappingsDiff.entriesDiffering().containsKey(STRUCTURED_PROPERTY_MAPPING_FIELD) + || mappingsDiff + .entriesOnlyOnLeft() + .containsKey(STRUCTURED_PROPERTY_MAPPING_FIELD)) + && structuredPropertiesDiffCount(super.currentMappings, super.targetMappings) + .getFirst() + > 0; if (super.requiresApplyMappings && super.isPureMappingsAddition) { log.info( @@ -181,19 +206,6 @@ public ReindexConfig build() { super.name, mappingsDiff.entriesDiffering()); } - super.hasNewStructuredProperty = - (mappingsDiff.entriesDiffering().containsKey(STRUCTURED_PROPERTY_MAPPING_FIELD) - || mappingsDiff - .entriesOnlyOnRight() - .containsKey(STRUCTURED_PROPERTY_MAPPING_FIELD)) - && getOrDefault( - super.currentMappings, - List.of("properties", STRUCTURED_PROPERTY_MAPPING_FIELD, "properties")) - .size() - < getOrDefault( - super.targetMappings, - List.of("properties", STRUCTURED_PROPERTY_MAPPING_FIELD, "properties")) - .size(); /* Consider analysis and settings changes */ super.requiresApplySettings = !isSettingsEqual() || !isAnalysisEqual(); @@ -208,7 +220,26 @@ < getOrDefault( "Index: {} - There's diff between new mappings, however reindexing is DISABLED.", super.name); } + } else if (super.hasRemovedStructuredProperty) { + if (super.enableIndexMappingsReindex + && super.enableIndexMappingsReindex + && super.enableStructuredPropertiesReindex) { + super.requiresApplyMappings = true; + super.requiresReindex = true; + } else { + if (!super.enableIndexMappingsReindex) { + log.warn( + "Index: {} - There's diff between new mappings, however reindexing is DISABLED.", + super.name); + } + if (!super.enableIndexMappingsReindex) { + log.warn( + "Index: {} - There's a removed Structured Property, however Structured Property reindexing is DISABLED.", + super.name); + } + } } + if (super.isSettingsReindex) { try { if (!isAnalysisEqual()) { @@ -255,6 +286,46 @@ private static TreeMap getOrDefault( } } + /** + * Return counts for removed and added structured properties based on the difference between the + * existing mapping configuration and the target configuration + * + * @return count of structured properties to be removed and added to the index mapping + */ + private static Pair structuredPropertiesDiffCount( + Map current, Map target) { + Set currentStructuredProperties = new HashSet<>(); + Set targetStructuredProperties = new HashSet<>(); + + // add non-versioned property ids + currentStructuredProperties.addAll( + getOrDefault( + current, List.of("properties", STRUCTURED_PROPERTY_MAPPING_FIELD, "properties")) + .keySet() + .stream() + .filter(k -> !STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD.equals(k)) + .collect(Collectors.toSet())); + targetStructuredProperties.addAll( + getOrDefault( + target, List.of("properties", STRUCTURED_PROPERTY_MAPPING_FIELD, "properties")) + .keySet() + .stream() + .filter(k -> !STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD.equals(k)) + .collect(Collectors.toSet())); + + // Extract versioned/typed property ids + currentStructuredProperties.addAll(getVersionedStructuredPropertyIds(current)); + targetStructuredProperties.addAll(getVersionedStructuredPropertyIds(target)); + + return Pair.of( + currentStructuredProperties.stream() + .filter(p -> !targetStructuredProperties.contains(p)) + .count(), + targetStructuredProperties.stream() + .filter(p -> !currentStructuredProperties.contains(p)) + .count()); + } + private boolean isAnalysisEqual() { if (super.targetSettings == null || !super.targetSettings.containsKey("index")) { return true; @@ -327,7 +398,7 @@ private static MapDifference calculateMapDifference( .collect(Collectors.toSet()); if (!targetObjectFields.isEmpty()) { - log.info("Object fields filtered from comparison: {}", targetObjectFields); + log.debug("Object fields filtered from comparison: {}", targetObjectFields); Map filteredCurrentMappings = removeKeys(currentMappings, targetObjectFields); Map filteredTargetMappings = removeKeys(targetMappings, targetObjectFields); @@ -336,6 +407,29 @@ private static MapDifference calculateMapDifference( return Maps.difference(currentMappings, targetMappings); } + + /** + * Given a mapping return a unique string for each version/typed structured property + * + * @param mappings Elastic mappings + * @return set of unique ids for each versioned/typed structured property + */ + private static Set getVersionedStructuredPropertyIds(Map mappings) { + Map versionedMappings = + getOrDefault( + mappings, + List.of( + "properties", + STRUCTURED_PROPERTY_MAPPING_FIELD, + "properties", + STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD, + "properties")); + + return flattenStructuredPropertyPath( + Map.entry(STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD, versionedMappings), 0) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + } } private static Map removeKeys( @@ -373,4 +467,22 @@ private static boolean equalsGroup(Map newSettings, Settings old } return true; } + + /** + * Return a map with dot delimited path as keys + * + * @param entry for root map + * @return dot delimited key path map + */ + private static Stream> flattenStructuredPropertyPath( + Map.Entry entry, int depth) { + if (entry.getValue() instanceof Map && depth < 5) { + Map nested = (Map) entry.getValue(); + + return nested.entrySet().stream() + .map(e -> new AbstractMap.SimpleEntry(entry.getKey() + "." + e.getKey(), e.getValue())) + .flatMap(e -> flattenStructuredPropertyPath(e, depth + 1)); + } + return Stream.of(entry); + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java index 97ec9f2192e52..b55418d12c7c2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java @@ -607,11 +607,7 @@ private QueryBuilder buildQueryStringV2( EntitySpec entitySpec = opContext.getEntityRegistry().getEntitySpec(entityName); QueryBuilder query = - SearchRequestHandler.getBuilder( - entitySpec, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) .getQuery( finalOpContext, input, @@ -647,11 +643,7 @@ private QueryBuilder buildQueryStringBrowseAcrossEntities( final BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery(); QueryBuilder query = - SearchRequestHandler.getBuilder( - entitySpecs, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) .getQuery( finalOpContext, input, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index d8c5c3317a2ec..b537a39634027 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -115,10 +115,7 @@ private SearchResult executeAndExtract( return transformIndexIntoEntityName( opContext.getSearchContext().getIndexConvention(), SearchRequestHandler.getBuilder( - entitySpec, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + entitySpec, searchConfiguration, customSearchConfiguration) .extractResult(opContext, searchResponse, filter, from, size)); } catch (Exception e) { log.error("Search query failed", e); @@ -215,10 +212,7 @@ private ScrollResult executeAndExtract( return transformIndexIntoEntityName( opContext.getSearchContext().getIndexConvention(), SearchRequestHandler.getBuilder( - entitySpecs, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + entitySpecs, searchConfiguration, customSearchConfiguration) .extractScrollResult( opContext, searchResponse, filter, keepAlive, size, supportsPointInTime())); } catch (Exception e) { @@ -261,11 +255,7 @@ public SearchResult search( Filter transformedFilters = transformFilterForEntities(postFilters, indexConvention); // Step 1: construct the query final SearchRequest searchRequest = - SearchRequestHandler.getBuilder( - entitySpecs, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) .getSearchRequest( opContext, finalInput, transformedFilters, sortCriterion, from, size, facets); searchRequest.indices( @@ -298,11 +288,7 @@ public SearchResult filter( EntitySpec entitySpec = opContext.getEntityRegistry().getEntitySpec(entityName); Filter transformedFilters = transformFilterForEntities(filters, indexConvention); final SearchRequest searchRequest = - SearchRequestHandler.getBuilder( - entitySpec, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) .getFilterRequest(opContext, transformedFilters, sortCriterion, from, size); searchRequest.indices(indexConvention.getIndexName(entitySpec)); @@ -335,10 +321,7 @@ public AutoCompleteResult autoComplete( EntitySpec entitySpec = opContext.getEntityRegistry().getEntitySpec(entityName); IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); AutocompleteRequestHandler builder = - AutocompleteRequestHandler.getBuilder( - entitySpec, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()); + AutocompleteRequestHandler.getBuilder(entitySpec, customSearchConfiguration); SearchRequest req = builder.getSearchRequest( opContext, @@ -383,11 +366,7 @@ public Map aggregateByValue( } IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); final SearchRequest searchRequest = - SearchRequestHandler.getBuilder( - entitySpecs, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) .getAggregationRequest( opContext, field, @@ -502,10 +481,7 @@ private SearchRequest getScrollRequest( } return SearchRequestHandler.getBuilder( - entitySpecs, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + entitySpecs, searchConfiguration, customSearchConfiguration) .getSearchRequest( opContext, finalInput, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java index 971cd7298639d..c6abc7c261f3d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java @@ -1,10 +1,11 @@ package com.linkedin.metadata.search.elasticsearch.query.request; import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.search.utils.ESUtils.toFacetField; +import static com.linkedin.metadata.search.utils.ESUtils.toParentField; import static com.linkedin.metadata.utils.SearchUtil.*; import com.linkedin.data.template.LongMap; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.StructuredPropertyUtils; @@ -135,15 +136,16 @@ private AggregationBuilder facetToAggregationBuilder( opContext.getSearchContext().getSearchFlags().getMaxAggValues(), configs.getMaxTermBucketSize()); for (int i = facets.size() - 1; i >= 0; i--) { - String facet = facets.get(i); - if (facet.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) { - String structPropFqn = facet.substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1); - StructuredPropertyUtils.validateStructuredPropertyFQN( - Set.of(structPropFqn), opContext.getRetrieverContext().get().getAspectRetriever()); - facet = - STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX - + StructuredPropertyUtils.sanitizeStructuredPropertyFQN(structPropFqn); - } + String facet = + StructuredPropertyUtils.lookupDefinitionFromFilterOrFacetName( + facets.get(i), opContext.getAspectRetriever()) + .map( + urnDefinition -> + STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX + + StructuredPropertyUtils.toElasticsearchFieldName( + urnDefinition.getFirst(), urnDefinition.getSecond())) + .orElse(facets.get(i)); + AggregationBuilder aggBuilder; if (facet.contains(AGGREGATION_SPECIAL_TYPE_DELIMITER)) { List specialTypeFields = List.of(facet.split(AGGREGATION_SPECIAL_TYPE_DELIMITER)); @@ -152,9 +154,11 @@ private AggregationBuilder facetToAggregationBuilder( aggBuilder = INDEX_VIRTUAL_FIELD.equalsIgnoreCase(specialTypeFields.get(1)) ? AggregationBuilders.missing(inputFacet) - .field(getAggregationField(ES_INDEX_FIELD)) + .field(getAggregationField(ES_INDEX_FIELD, opContext.getAspectRetriever())) : AggregationBuilders.missing(inputFacet) - .field(getAggregationField(specialTypeFields.get(1))); + .field( + getAggregationField( + specialTypeFields.get(1), opContext.getAspectRetriever())); break; default: throw new UnsupportedOperationException( @@ -164,11 +168,11 @@ private AggregationBuilder facetToAggregationBuilder( aggBuilder = facet.equalsIgnoreCase(INDEX_VIRTUAL_FIELD) ? AggregationBuilders.terms(inputFacet) - .field(getAggregationField(ES_INDEX_FIELD)) + .field(getAggregationField(ES_INDEX_FIELD, opContext.getAspectRetriever())) .size(maxTermBuckets) .minDocCount(0) : AggregationBuilders.terms(inputFacet) - .field(getAggregationField(facet)) + .field(getAggregationField(facet, opContext.getAspectRetriever())) .size(maxTermBuckets); } if (lastAggBuilder != null) { @@ -180,13 +184,14 @@ private AggregationBuilder facetToAggregationBuilder( return lastAggBuilder; } - private String getAggregationField(final String facet) { + private String getAggregationField( + final String facet, @Nullable AspectRetriever aspectRetriever) { if (facet.startsWith("has")) { // Boolean hasX field, not a keyword field. Return the name of the original facet. return facet; } // Otherwise assume that this field is of keyword type. - return ESUtils.toKeywordField(facet, false); + return ESUtils.toKeywordField(facet, false, aspectRetriever); } List getDefaultFacetFieldsFromAnnotation(final SearchableAnnotation annotation) { @@ -229,10 +234,12 @@ private String computeDisplayName(String name) { } List extractAggregationMetadata( - @Nonnull SearchResponse searchResponse, @Nullable Filter filter) { + @Nonnull SearchResponse searchResponse, + @Nullable Filter filter, + @Nullable AspectRetriever aspectRetriever) { final List aggregationMetadataList = new ArrayList<>(); if (searchResponse.getAggregations() == null) { - return addFiltersToAggregationMetadata(aggregationMetadataList, filter); + return addFiltersToAggregationMetadata(aggregationMetadataList, filter, aspectRetriever); } for (Map.Entry entry : searchResponse.getAggregations().getAsMap().entrySet()) { @@ -243,7 +250,7 @@ List extractAggregationMetadata( processMissingAggregations(entry, aggregationMetadataList); } } - return addFiltersToAggregationMetadata(aggregationMetadataList, filter); + return addFiltersToAggregationMetadata(aggregationMetadataList, filter, aspectRetriever); } private void processTermAggregations( @@ -343,38 +350,45 @@ private static Map extractTermAggregations( /** Injects the missing conjunctive filters into the aggregations list. */ public List addFiltersToAggregationMetadata( - @Nonnull final List originalMetadata, @Nullable final Filter filter) { + @Nonnull final List originalMetadata, + @Nullable final Filter filter, + @Nullable AspectRetriever aspectRetriever) { if (filter == null) { return originalMetadata; } if (filter.getOr() != null) { - addOrFiltersToAggregationMetadata(filter.getOr(), originalMetadata); + addOrFiltersToAggregationMetadata(filter.getOr(), originalMetadata, aspectRetriever); } else if (filter.getCriteria() != null) { - addCriteriaFiltersToAggregationMetadata(filter.getCriteria(), originalMetadata); + addCriteriaFiltersToAggregationMetadata( + filter.getCriteria(), originalMetadata, aspectRetriever); } return originalMetadata; } void addOrFiltersToAggregationMetadata( @Nonnull final ConjunctiveCriterionArray or, - @Nonnull final List originalMetadata) { + @Nonnull final List originalMetadata, + @Nullable AspectRetriever aspectRetriever) { for (ConjunctiveCriterion conjunction : or) { // For each item in the conjunction, inject an empty aggregation if necessary - addCriteriaFiltersToAggregationMetadata(conjunction.getAnd(), originalMetadata); + addCriteriaFiltersToAggregationMetadata( + conjunction.getAnd(), originalMetadata, aspectRetriever); } } private void addCriteriaFiltersToAggregationMetadata( @Nonnull final CriterionArray criteria, - @Nonnull final List originalMetadata) { + @Nonnull final List originalMetadata, + @Nullable AspectRetriever aspectRetriever) { for (Criterion criterion : criteria) { - addCriterionFiltersToAggregationMetadata(criterion, originalMetadata); + addCriterionFiltersToAggregationMetadata(criterion, originalMetadata, aspectRetriever); } } private void addCriterionFiltersToAggregationMetadata( @Nonnull final Criterion criterion, - @Nonnull final List aggregationMetadata) { + @Nonnull final List aggregationMetadata, + @Nullable AspectRetriever aspectRetriever) { // We should never see duplicate aggregation for the same field in aggregation metadata list. final Map aggregationMetadataMap = @@ -382,7 +396,7 @@ private void addCriterionFiltersToAggregationMetadata( .collect(Collectors.toMap(AggregationMetadata::getName, agg -> agg)); // Map a filter criterion to a facet field (e.g. domains.keyword -> domains) - final String finalFacetField = toFacetField(criterion.getField()); + final String finalFacetField = toParentField(criterion.getField(), aspectRetriever); if (finalFacetField == null) { log.warn( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java index 37a7e5adde2dc..8ee9587ca2ae4 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java @@ -8,7 +8,6 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringArray; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.custom.AutocompleteConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.config.search.custom.QueryConfiguration; @@ -52,16 +51,13 @@ public class AutocompleteRequestHandler { private static final Map AUTOCOMPLETE_QUERY_BUILDER_BY_ENTITY_NAME = new ConcurrentHashMap<>(); - private final AspectRetriever aspectRetriever; - private final CustomizedQueryHandler customizedQueryHandler; private final EntitySpec entitySpec; public AutocompleteRequestHandler( @Nonnull EntitySpec entitySpec, - @Nullable CustomSearchConfiguration customSearchConfiguration, - @Nonnull AspectRetriever aspectRetriever) { + @Nullable CustomSearchConfiguration customSearchConfiguration) { this.entitySpec = entitySpec; List fieldSpecs = entitySpec.getSearchableFieldSpecs(); this.customizedQueryHandler = CustomizedQueryHandler.builder(customSearchConfiguration).build(); @@ -87,17 +83,13 @@ public AutocompleteRequestHandler( set1.addAll(set2); return set1; })); - this.aspectRetriever = aspectRetriever; } public static AutocompleteRequestHandler getBuilder( @Nonnull EntitySpec entitySpec, - @Nullable CustomSearchConfiguration customSearchConfiguration, - @Nonnull AspectRetriever aspectRetriever) { + @Nullable CustomSearchConfiguration customSearchConfiguration) { return AUTOCOMPLETE_QUERY_BUILDER_BY_ENTITY_NAME.computeIfAbsent( - entitySpec, - k -> - new AutocompleteRequestHandler(entitySpec, customSearchConfiguration, aspectRetriever)); + entitySpec, k -> new AutocompleteRequestHandler(entitySpec, customSearchConfiguration)); } public SearchRequest getSearchRequest( @@ -120,7 +112,8 @@ public SearchRequest getSearchRequest( // Initial query with input filters BoolQueryBuilder filterQuery = - ESUtils.buildFilterQuery(filter, false, searchableFieldTypes, aspectRetriever); + ESUtils.buildFilterQuery( + filter, false, searchableFieldTypes, opContext.getAspectRetriever()); baseQuery.filter(filterQuery); // Add autocomplete query diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java index 4835ebe164e1c..6a1f24a92e4a3 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java @@ -7,6 +7,7 @@ import static com.linkedin.metadata.search.elasticsearch.query.request.CustomizedQueryHandler.unquote; import com.google.common.annotations.VisibleForTesting; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; @@ -109,7 +110,11 @@ private QueryBuilder buildInternalQuery( getSimpleQuery(opContext.getEntityRegistry(), customQueryConfig, entitySpecs, sanitizedQuery) .ifPresent(finalQuery::should); getPrefixAndExactMatchQuery( - opContext.getEntityRegistry(), customQueryConfig, entitySpecs, sanitizedQuery) + opContext.getEntityRegistry(), + customQueryConfig, + entitySpecs, + sanitizedQuery, + opContext.getAspectRetriever()) .ifPresent(finalQuery::should); } else { final String withoutQueryPrefix = @@ -121,7 +126,11 @@ private QueryBuilder buildInternalQuery( .ifPresent(finalQuery::should); if (exactMatchConfiguration.isEnableStructured()) { getPrefixAndExactMatchQuery( - opContext.getEntityRegistry(), customQueryConfig, entitySpecs, withoutQueryPrefix) + opContext.getEntityRegistry(), + customQueryConfig, + entitySpecs, + withoutQueryPrefix, + opContext.getAspectRetriever()) .ifPresent(finalQuery::should); } } @@ -369,7 +378,8 @@ private Optional getPrefixAndExactMatchQuery( @Nonnull EntityRegistry entityRegistry, @Nullable QueryConfiguration customQueryConfig, @Nonnull List entitySpecs, - String query) { + String query, + @Nullable AspectRetriever aspectRetriever) { final boolean isPrefixQuery = customQueryConfig == null @@ -408,7 +418,8 @@ private Optional getPrefixAndExactMatchQuery( if (caseSensitivityEnabled) { finalQuery.should( QueryBuilders.termQuery( - ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), + ESUtils.toKeywordField( + searchFieldConfig.fieldName(), false, aspectRetriever), unquotedQuery) .caseInsensitive(false) .boost( @@ -419,7 +430,8 @@ private Optional getPrefixAndExactMatchQuery( // Exact match case-insensitive finalQuery.should( QueryBuilders.termQuery( - ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), + ESUtils.toKeywordField( + searchFieldConfig.fieldName(), false, aspectRetriever), unquotedQuery) .caseInsensitive(true) .boost( @@ -432,7 +444,8 @@ private Optional getPrefixAndExactMatchQuery( if (searchFieldConfig.isWordGramSubfield() && isPrefixQuery) { finalQuery.should( QueryBuilders.matchPhraseQuery( - ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), + ESUtils.toKeywordField( + searchFieldConfig.fieldName(), false, aspectRetriever), unquotedQuery) .boost( searchFieldConfig.boost() diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index dfb33897bcf4a..66ad1e3be363f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -8,7 +8,6 @@ import com.google.common.collect.ImmutableMap; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.DoubleMap; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.EntitySpec; @@ -117,8 +116,7 @@ private SearchRequestHandler( public static SearchRequestHandler getBuilder( @Nonnull EntitySpec entitySpec, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration, - @Nonnull AspectRetriever aspectRetriever) { + @Nullable CustomSearchConfiguration customSearchConfiguration) { return REQUEST_HANDLER_BY_ENTITY_NAME.computeIfAbsent( ImmutableList.of(entitySpec), k -> new SearchRequestHandler(entitySpec, configs, customSearchConfiguration)); @@ -127,8 +125,7 @@ public static SearchRequestHandler getBuilder( public static SearchRequestHandler getBuilder( @Nonnull List entitySpecs, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration, - @Nonnull AspectRetriever aspectRetriever) { + @Nullable CustomSearchConfiguration customSearchConfiguration) { return REQUEST_HANDLER_BY_ENTITY_NAME.computeIfAbsent( ImmutableList.copyOf(entitySpecs), k -> new SearchRequestHandler(entitySpecs, configs, customSearchConfiguration)); @@ -167,10 +164,7 @@ public static BoolQueryBuilder getFilterQuery( Map> searchableFieldTypes) { BoolQueryBuilder filterQuery = ESUtils.buildFilterQuery( - filter, - false, - searchableFieldTypes, - opContext.getRetrieverContext().get().getAspectRetriever()); + filter, false, searchableFieldTypes, opContext.getAspectRetriever()); return applyDefaultSearchFilters(opContext, filter, filterQuery); } @@ -337,7 +331,9 @@ public SearchRequest getAggregationRequest( searchSourceBuilder.query(filterQuery); searchSourceBuilder.size(0); searchSourceBuilder.aggregation( - AggregationBuilders.terms(field).field(ESUtils.toKeywordField(field, false)).size(limit)); + AggregationBuilders.terms(field) + .field(ESUtils.toKeywordField(field, false, opContext.getAspectRetriever())) + .size(limit)); searchRequest.source(searchSourceBuilder); return searchRequest; @@ -528,7 +524,8 @@ private SearchResultMetadata extractSearchResultMetadata( if (Boolean.FALSE.equals(searchFlags.isSkipAggregates())) { final List aggregationMetadataList = - aggregationQueryBuilder.extractAggregationMetadata(searchResponse, filter); + aggregationQueryBuilder.extractAggregationMetadata( + searchResponse, filter, opContext.getAspectRetriever()); searchResultMetadata.setAggregations(new AggregationMetadataArray(aggregationMetadataList)); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java index a655f90597e20..dd36f0a9456a7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java @@ -1,7 +1,7 @@ package com.linkedin.metadata.search.transformer; import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.models.StructuredPropertyUtils.sanitizeStructuredPropertyFQN; +import static com.linkedin.metadata.models.StructuredPropertyUtils.toElasticsearchFieldName; import static com.linkedin.metadata.models.annotation.SearchableAnnotation.OBJECT_FIELD_TYPES; import static com.linkedin.metadata.search.elasticsearch.indexbuilder.MappingsBuilder.SYSTEM_CREATED_FIELD; @@ -19,7 +19,6 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.AspectRetriever; -import com.linkedin.metadata.aspect.validation.StructuredPropertiesValidator; import com.linkedin.metadata.entity.EntityUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; @@ -27,6 +26,7 @@ import com.linkedin.metadata.models.SearchScoreFieldSpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.SearchableRefFieldSpec; +import com.linkedin.metadata.models.StructuredPropertyUtils; import com.linkedin.metadata.models.annotation.SearchableAnnotation.FieldType; import com.linkedin.metadata.models.extractor.FieldExtractor; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -38,12 +38,15 @@ import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.StreamSupport; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.Setter; import lombok.extern.slf4j.Slf4j; @@ -109,6 +112,37 @@ public static ObjectNode withSystemCreated( return searchDocument; } + /** + * Handle object type UPSERTS where the new value to upsert removes a previous key. Only enabling + * for structured properties to start with i.e. + * + *

New => { "structuredProperties.foobar": "value1" } Old => { "structuredProperties.foobar": + * "value1" "structuredProperties.foobar2": "value2" } Expected => { + * "structuredProperties.foobar": "value1" "structuredProperties.foobar2": null } + * + * @param searchDocument new document + * @param previousSearchDocument previous document (if not present, no-op) + * @return searchDocument to upsert + */ + public static ObjectNode handleRemoveFields( + @Nonnull ObjectNode searchDocument, @Nullable ObjectNode previousSearchDocument) { + if (previousSearchDocument != null) { + Set documentFields = objectFieldsFilter(searchDocument.fieldNames()); + objectFieldsFilter(previousSearchDocument.fieldNames()).stream() + .filter(prevFieldName -> !documentFields.contains(prevFieldName)) + .forEach(removeFieldName -> searchDocument.set(removeFieldName, null)); + } + // no-op + return searchDocument; + } + + private static Set objectFieldsFilter(Iterator fieldNames) { + Iterable iterable = () -> fieldNames; + return StreamSupport.stream(iterable.spliterator(), false) + .filter(fieldName -> fieldName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) + .collect(Collectors.toSet()); + } + public Optional transformAspect( @Nonnull OperationContext opContext, final @Nonnull Urn urn, @@ -388,25 +422,28 @@ private void setStructuredPropertiesSearchValue( .entrySet() .forEach( propertyEntry -> { - StructuredPropertyDefinition definition = - new StructuredPropertyDefinition( - definitions - .get(propertyEntry.getKey()) - .get(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) - .data()); + Optional definition = + Optional.ofNullable( + definitions + .get(propertyEntry.getKey()) + .get(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) + .map(def -> new StructuredPropertyDefinition(def.data())); + + LogicalValueType logicalValueType = + definition + .map(StructuredPropertyUtils::getLogicalValueType) + .orElse(LogicalValueType.UNKNOWN); String fieldName = String.join( ".", List.of( STRUCTURED_PROPERTY_MAPPING_FIELD, - sanitizeStructuredPropertyFQN(definition.getQualifiedName()))); + toElasticsearchFieldName( + propertyEntry.getKey(), definition.orElse(null)))); if (forDelete) { searchDocument.set(fieldName, JsonNodeFactory.instance.nullNode()); } else { - LogicalValueType logicalValueType = - StructuredPropertiesValidator.getLogicalValueType(definition.getValueType()); - ArrayNode arrayNode = JsonNodeFactory.instance.arrayNode(); propertyEntry @@ -487,7 +524,7 @@ private Optional getNodeForRef( final Object fieldValue, final FieldType fieldType) { EntityRegistry entityRegistry = opContext.getEntityRegistry(); - AspectRetriever aspectRetriever = opContext.getRetrieverContext().get().getAspectRetriever(); + AspectRetriever aspectRetriever = opContext.getAspectRetriever(); if (depth == 0) { if (fieldValue.toString().isEmpty()) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index a6381b07c087c..e299dde62b184 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -2,6 +2,7 @@ import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.models.annotation.SearchableAnnotation.OBJECT_FIELD_TYPES; +import static com.linkedin.metadata.search.elasticsearch.indexbuilder.MappingsBuilder.SUBFIELDS; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.KEYWORD_FIELDS; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.PATH_HIERARCHY_FIELDS; import static com.linkedin.metadata.search.utils.SearchUtils.isUrn; @@ -50,7 +51,6 @@ /** TODO: Add more robust unit tests for this critical class. */ @Slf4j public class ESUtils { - private static final String DEFAULT_SEARCH_RESULTS_SORT_BY_FIELD = "urn"; public static final String KEYWORD_ANALYZER = "keyword"; public static final String KEYWORD_SUFFIX = ".keyword"; @@ -140,7 +140,7 @@ public static BoolQueryBuilder buildFilterQuery( @Nullable Filter filter, boolean isTimeseries, final Map> searchableFieldTypes, - @Nonnull AspectRetriever aspectRetriever) { + @Nullable AspectRetriever aspectRetriever) { BoolQueryBuilder finalQueryBuilder = QueryBuilders.boolQuery(); if (filter == null) { return finalQueryBuilder; @@ -155,7 +155,8 @@ public static BoolQueryBuilder buildFilterQuery( .forEach( or -> finalQueryBuilder.should( - ESUtils.buildConjunctiveFilterQuery(or, isTimeseries, searchableFieldTypes))); + ESUtils.buildConjunctiveFilterQuery( + or, isTimeseries, searchableFieldTypes, aspectRetriever))); // The default is not always 1 (ensure consistent default) finalQueryBuilder.minimumShouldMatch(1); } else if (filter.getCriteria() != null) { @@ -170,7 +171,8 @@ public static BoolQueryBuilder buildFilterQuery( || criterion.hasValues() || criterion.getCondition() == Condition.IS_NULL) { andQueryBuilder.must( - getQueryBuilderFromCriterion(criterion, isTimeseries, searchableFieldTypes)); + getQueryBuilderFromCriterion( + criterion, isTimeseries, searchableFieldTypes, aspectRetriever)); } }); finalQueryBuilder.should(andQueryBuilder); @@ -184,7 +186,8 @@ public static BoolQueryBuilder buildFilterQuery( public static BoolQueryBuilder buildConjunctiveFilterQuery( @Nonnull ConjunctiveCriterion conjunctiveCriterion, boolean isTimeseries, - Map> searchableFieldTypes) { + Map> searchableFieldTypes, + @Nullable AspectRetriever aspectRetriever) { final BoolQueryBuilder andQueryBuilder = new BoolQueryBuilder(); conjunctiveCriterion .getAnd() @@ -196,10 +199,12 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery( if (!criterion.isNegated()) { // `filter` instead of `must` (enables caching and bypasses scoring) andQueryBuilder.filter( - getQueryBuilderFromCriterion(criterion, isTimeseries, searchableFieldTypes)); + getQueryBuilderFromCriterion( + criterion, isTimeseries, searchableFieldTypes, aspectRetriever)); } else { andQueryBuilder.mustNot( - getQueryBuilderFromCriterion(criterion, isTimeseries, searchableFieldTypes)); + getQueryBuilderFromCriterion( + criterion, isTimeseries, searchableFieldTypes, aspectRetriever)); } } }); @@ -237,11 +242,9 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery( public static QueryBuilder getQueryBuilderFromCriterion( @Nonnull final Criterion criterion, boolean isTimeseries, - final Map> searchableFieldTypes) { - final String fieldName = toFacetField(criterion.getField()); - if (fieldName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD)) { - criterion.setField(fieldName); - } + final Map> searchableFieldTypes, + @Nullable AspectRetriever aspectRetriever) { + final String fieldName = toParentField(criterion.getField(), aspectRetriever); /* * Check the field-name for a "sibling" field, or one which should ALWAYS @@ -256,11 +259,11 @@ public static QueryBuilder getQueryBuilderFromCriterion( if (maybeFieldToExpand.isPresent()) { return getQueryBuilderFromCriterionForFieldToExpand( - maybeFieldToExpand.get(), criterion, isTimeseries, searchableFieldTypes); + maybeFieldToExpand.get(), criterion, isTimeseries, searchableFieldTypes, aspectRetriever); } return getQueryBuilderFromCriterionForSingleField( - criterion, isTimeseries, searchableFieldTypes); + criterion, isTimeseries, searchableFieldTypes, criterion.getField(), aspectRetriever); } public static String getElasticTypeForFieldType(SearchableAnnotation.FieldType fieldType) { @@ -398,28 +401,64 @@ public static String escapeReservedCharacters(@Nonnull String input) { return input; } + /** + * Resolve structured property field, or normal field, and strip subfields + * + * @param filterField name of the field used in the filter request + * @param aspectRetriever aspect retriever, used if structured property + * @return normalized field name without subfields + */ @Nonnull - public static String toFacetField(@Nonnull final String filterField) { - String fieldName = filterField; - if (fieldName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD + ".")) { - String fqn = fieldName.substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1); - fieldName = - STRUCTURED_PROPERTY_MAPPING_FIELD - + "." - + StructuredPropertyUtils.sanitizeStructuredPropertyFQN(fqn); + public static String toParentField( + @Nonnull final String filterField, @Nullable final AspectRetriever aspectRetriever) { + String fieldName = + StructuredPropertyUtils.lookupDefinitionFromFilterOrFacetName(filterField, aspectRetriever) + .map( + urnDefinition -> + STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX + + StructuredPropertyUtils.toElasticsearchFieldName( + urnDefinition.getFirst(), urnDefinition.getSecond())) + .orElse(filterField); + + for (String subfield : SUBFIELDS) { + String SUFFIX = "." + subfield; + if (filterField.endsWith(SUFFIX)) { + return fieldName.replace(SUFFIX, ""); + } } - return fieldName.replace(ESUtils.KEYWORD_SUFFIX, ""); + + return fieldName; } + /** + * Return resolved structured property field, normal field, or subfield which is of type `keyword` + * + * @param filterField the field name used in the filter + * @param skipKeywordSuffix prevent use of `keyword` subfield, useful when parent field is known + * or always `keyword` + * @param aspectRetriever aspect retriever, used if structured property field + * @return the preferred field to use for `keyword` queries + */ @Nonnull public static String toKeywordField( - @Nonnull final String filterField, final boolean skipKeywordSuffix) { + @Nonnull final String filterField, + final boolean skipKeywordSuffix, + @Nullable final AspectRetriever aspectRetriever) { + String fieldName = + StructuredPropertyUtils.lookupDefinitionFromFilterOrFacetName(filterField, aspectRetriever) + .map( + urnDefinition -> + STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX + + StructuredPropertyUtils.toElasticsearchFieldName( + urnDefinition.getFirst(), urnDefinition.getSecond())) + .orElse(filterField); + return skipKeywordSuffix - || KEYWORD_FIELDS.contains(filterField) - || PATH_HIERARCHY_FIELDS.contains(filterField) - || filterField.contains(".") - ? filterField - : filterField + ESUtils.KEYWORD_SUFFIX; + || KEYWORD_FIELDS.contains(fieldName) + || PATH_HIERARCHY_FIELDS.contains(fieldName) + || SUBFIELDS.stream().anyMatch(subfield -> fieldName.endsWith("." + subfield)) + ? fieldName + : fieldName + ESUtils.KEYWORD_SUFFIX; } public static RequestOptions buildReindexTaskRequestOptions( @@ -464,7 +503,8 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( @Nonnull final List fields, @Nonnull final Criterion criterion, final boolean isTimeseries, - final Map> searchableFieldTypes) { + final Map> searchableFieldTypes, + @Nonnull AspectRetriever aspectRetriever) { final BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder(); for (String field : fields) { Criterion criterionToQuery = new Criterion(); @@ -476,10 +516,11 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( if (criterion.hasValue()) { criterionToQuery.setValue(criterion.getValue()); } - criterionToQuery.setField(toKeywordField(field, isTimeseries)); + criterionToQuery.setField(toKeywordField(field, isTimeseries, aspectRetriever)); orQueryBuilder.should( getQueryBuilderFromCriterionForSingleField( - criterionToQuery, isTimeseries, searchableFieldTypes)); + criterionToQuery, isTimeseries, searchableFieldTypes, null, aspectRetriever) + .queryName(field)); } return orQueryBuilder; } @@ -488,40 +529,49 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( private static QueryBuilder getQueryBuilderFromCriterionForSingleField( @Nonnull Criterion criterion, boolean isTimeseries, - final Map> searchableFieldTypes) { + final Map> searchableFieldTypes, + @Nullable String queryName, + @Nonnull AspectRetriever aspectRetriever) { final Condition condition = criterion.getCondition(); - final String fieldName = toFacetField(criterion.getField()); + final String fieldName = toParentField(criterion.getField(), aspectRetriever); if (condition == Condition.IS_NULL) { return QueryBuilders.boolQuery() - .mustNot(QueryBuilders.existsQuery(criterion.getField())) - .queryName(fieldName); + .mustNot(QueryBuilders.existsQuery(fieldName)) + .queryName(queryName != null ? queryName : fieldName); } else if (condition == Condition.EXISTS) { return QueryBuilders.boolQuery() - .must(QueryBuilders.existsQuery(criterion.getField())) - .queryName(fieldName); + .must(QueryBuilders.existsQuery(fieldName)) + .queryName(queryName != null ? queryName : fieldName); } else if (criterion.hasValues() || criterion.hasValue()) { if (condition == Condition.EQUAL) { return buildEqualsConditionFromCriterion( - fieldName, criterion, isTimeseries, searchableFieldTypes); + fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever) + .queryName(queryName != null ? queryName : fieldName); } else if (RANGE_QUERY_CONDITIONS.contains(condition)) { return buildRangeQueryFromCriterion( - criterion, fieldName, searchableFieldTypes, condition, isTimeseries); + criterion, + fieldName, + searchableFieldTypes, + condition, + isTimeseries, + aspectRetriever) + .queryName(queryName != null ? queryName : fieldName); } else if (condition == Condition.CONTAIN) { return QueryBuilders.wildcardQuery( - toKeywordField(criterion.getField(), isTimeseries), + toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), "*" + ESUtils.escapeReservedCharacters(criterion.getValue().trim()) + "*") - .queryName(fieldName); + .queryName(queryName != null ? queryName : fieldName); } else if (condition == Condition.START_WITH) { return QueryBuilders.wildcardQuery( - toKeywordField(criterion.getField(), isTimeseries), + toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), ESUtils.escapeReservedCharacters(criterion.getValue().trim()) + "*") - .queryName(fieldName); + .queryName(queryName != null ? queryName : fieldName); } else if (condition == Condition.END_WITH) { return QueryBuilders.wildcardQuery( - toKeywordField(criterion.getField(), isTimeseries), + toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), "*" + ESUtils.escapeReservedCharacters(criterion.getValue().trim())) - .queryName(fieldName); + .queryName(queryName != null ? queryName : fieldName); } } throw new UnsupportedOperationException("Unsupported condition: " + condition); @@ -531,20 +581,21 @@ private static QueryBuilder buildEqualsConditionFromCriterion( @Nonnull final String fieldName, @Nonnull final Criterion criterion, final boolean isTimeseries, - final Map> searchableFieldTypes) { + final Map> searchableFieldTypes, + @Nonnull AspectRetriever aspectRetriever) { /* * If the newer 'values' field of Criterion.pdl is set, then we * handle using the following code to allow multi-match. */ if (!criterion.getValues().isEmpty()) { return buildEqualsConditionFromCriterionWithValues( - fieldName, criterion, isTimeseries, searchableFieldTypes); + fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever); } /* * Otherwise, we are likely using the deprecated 'value' field. * We handle using the legacy code path below. */ - return buildEqualsFromCriterionWithValue(fieldName, criterion, isTimeseries); + return buildEqualsFromCriterionWithValue(fieldName, criterion, isTimeseries, aspectRetriever); } /** @@ -555,8 +606,9 @@ private static QueryBuilder buildEqualsConditionFromCriterionWithValues( @Nonnull final String fieldName, @Nonnull final Criterion criterion, final boolean isTimeseries, - final Map> searchableFieldTypes) { - Set fieldTypes = getFieldTypes(searchableFieldTypes, fieldName); + final Map> searchableFieldTypes, + @Nonnull AspectRetriever aspectRetriever) { + Set fieldTypes = getFieldTypes(searchableFieldTypes, fieldName, aspectRetriever); if (fieldTypes.size() > 1) { log.warn( "Multiple field types for field name {}, determining best fit for set: {}", @@ -576,21 +628,32 @@ private static QueryBuilder buildEqualsConditionFromCriterionWithValues( return QueryBuilders.termsQuery(fieldName, doubleValues).queryName(fieldName); } return QueryBuilders.termsQuery( - toKeywordField(criterion.getField(), isTimeseries), criterion.getValues()) + toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), + criterion.getValues()) .queryName(fieldName); } private static Set getFieldTypes( - Map> searchableFields, String fieldName) { - Set fieldTypes = - searchableFields.getOrDefault(fieldName, Collections.emptySet()); - Set finalFieldTypes = - fieldTypes.stream().map(ESUtils::getElasticTypeForFieldType).collect(Collectors.toSet()); - if (fieldTypes.size() > 1) { + Map> searchableFields, + String fieldName, + @Nullable AspectRetriever aspectRetriever) { + + final Set finalFieldTypes; + if (fieldName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) { + finalFieldTypes = + StructuredPropertyUtils.toElasticsearchFieldType(fieldName, aspectRetriever); + } else { + Set fieldTypes = + searchableFields.getOrDefault(fieldName, Collections.emptySet()); + finalFieldTypes = + fieldTypes.stream().map(ESUtils::getElasticTypeForFieldType).collect(Collectors.toSet()); + } + + if (finalFieldTypes.size() > 1) { log.warn( "Multiple field types for field name {}, determining best fit for set: {}", fieldName, - fieldTypes); + finalFieldTypes); } return finalFieldTypes; } @@ -600,8 +663,9 @@ private static RangeQueryBuilder buildRangeQueryFromCriterion( String fieldName, Map> searchableFieldTypes, Condition condition, - boolean isTimeseries) { - Set fieldTypes = getFieldTypes(searchableFieldTypes, fieldName); + boolean isTimeseries, + AspectRetriever aspectRetriever) { + Set fieldTypes = getFieldTypes(searchableFieldTypes, fieldName, aspectRetriever); // Determine criterion value, range query only accepts single value so take first value in // values if multiple @@ -624,7 +688,7 @@ private static RangeQueryBuilder buildRangeQueryFromCriterion( documentFieldName = fieldName; } else { criterionValue = criterionValueString; - documentFieldName = toKeywordField(fieldName, isTimeseries); + documentFieldName = toKeywordField(fieldName, isTimeseries, aspectRetriever); } // Set up QueryBuilder based on condition @@ -653,12 +717,14 @@ private static RangeQueryBuilder buildRangeQueryFromCriterion( private static QueryBuilder buildEqualsFromCriterionWithValue( @Nonnull final String fieldName, @Nonnull final Criterion criterion, - final boolean isTimeseries) { + final boolean isTimeseries, + @Nonnull AspectRetriever aspectRetriever) { // If the value is an URN style value, then we do not attempt to split it by comma (for obvious // reasons) if (isUrn(criterion.getValue())) { return QueryBuilders.matchQuery( - toKeywordField(criterion.getField(), isTimeseries), criterion.getValue().trim()) + toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), + criterion.getValue().trim()) .queryName(fieldName) .analyzer(KEYWORD_ANALYZER); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java index 29389f2e66558..ad2825ead3d0d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java @@ -165,7 +165,7 @@ private Callable processBatch( return () -> { StopWatch stopWatch = new StopWatch(); stopWatch.start(); - AspectRetriever aspectRetriever = opContext.getRetrieverContext().get().getAspectRetriever(); + AspectRetriever aspectRetriever = opContext.getAspectRetrieverOpt().get(); log.info("Batch {} for BA:{} started", batchNumber, entityKey); ExecutionResult executionResult = new ExecutionResult(); executionResult.setBatchNumber(batchNumber); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java index da35c9e0b0784..dff0a99a142b7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java @@ -6,6 +6,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; import com.linkedin.common.InputField; @@ -130,8 +131,7 @@ public void handleChangeEvent( @Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) { try { MCLItemImpl batch = - MCLItemImpl.builder() - .build(event, opContext.getRetrieverContext().get().getAspectRetriever()); + MCLItemImpl.builder().build(event, opContext.getAspectRetrieverOpt().get()); Stream sideEffects = AspectsBatch.applyMCLSideEffects(List.of(batch), opContext.getRetrieverContext().get()); @@ -187,7 +187,7 @@ private void handleUpdateChangeEvent( } // Step 1. Handle StructuredProperties Index Mapping changes - updateIndexMappings(entitySpec, aspectSpec, aspect, previousAspect); + updateIndexMappings(urn, entitySpec, aspectSpec, aspect, previousAspect); // Step 2. For all aspects, attempt to update Search updateSearchService(opContext, event); @@ -206,6 +206,7 @@ private void handleUpdateChangeEvent( } public void updateIndexMappings( + @Nonnull Urn urn, EntitySpec entitySpec, AspectSpec aspectSpec, RecordTemplate newValue, @@ -228,7 +229,7 @@ public void updateIndexMappings( if (newDefinition.getEntityTypes().size() > 0) { _entityIndexBuilders - .buildReindexConfigsWithNewStructProp(newDefinition) + .buildReindexConfigsWithNewStructProp(urn, newDefinition) .forEach( reindexState -> { try { @@ -526,8 +527,8 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev RecordTemplate previousAspect = event.getPreviousRecordTemplate(); String entityName = event.getEntitySpec().getName(); - Optional searchDocument; - Optional previousSearchDocument = Optional.empty(); + Optional searchDocument; + Optional previousSearchDocument = Optional.empty(); try { searchDocument = _searchDocumentTransformer @@ -539,8 +540,7 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev event.getChangeType(), event.getEntitySpec(), aspectSpec, - event.getAuditStamp())) - .map(Objects::toString); + event.getAuditStamp())); } catch (Exception e) { log.error( "Error in getting documents from aspect: {} for aspect {}", e, aspectSpec.getName()); @@ -557,7 +557,6 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev return; } - String searchDocumentValue = searchDocument.get(); if (_searchDiffMode && (systemMetadata == null || systemMetadata.getProperties() == null @@ -565,9 +564,8 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev if (previousAspect != null) { try { previousSearchDocument = - _searchDocumentTransformer - .transformAspect(opContext, urn, previousAspect, aspectSpec, false) - .map(Objects::toString); + _searchDocumentTransformer.transformAspect( + opContext, urn, previousAspect, aspectSpec, false); } catch (Exception e) { log.error( "Error in getting documents from previous aspect state: {} for aspect {}, continuing without diffing.", @@ -577,15 +575,19 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev } if (previousSearchDocument.isPresent()) { - String previousSearchDocumentValue = previousSearchDocument.get(); - if (searchDocumentValue.equals(previousSearchDocumentValue)) { + if (searchDocument.get().toString().equals(previousSearchDocument.get().toString())) { // No changes to search document, skip writing no-op update return; } } } - _entitySearchService.upsertDocument(opContext, entityName, searchDocument.get(), docId.get()); + String finalDocument = + SearchDocumentTransformer.handleRemoveFields( + searchDocument.get(), previousSearchDocument.orElse(null)) + .toString(); + + _entitySearchService.upsertDocument(opContext, entityName, finalDocument, docId.get()); } /** Process snapshot and update time-series index */ diff --git a/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java b/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java index e894558e3d1af..51d2cee06730c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java @@ -1,31 +1,27 @@ package com.linkedin.metadata.shared; +import com.linkedin.common.urn.Urn; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ReindexConfig; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.io.IOException; import java.util.Collection; import java.util.List; public interface ElasticSearchIndexed { - /** - * The index configurations for the given service. - * - * @return List of reindex configurations - */ - List buildReindexConfigs() throws IOException; - /** * The index configurations for the given service with StructuredProperties applied. * * @param properties The structured properties to apply to the index mappings * @return List of reindex configurations */ - List buildReindexConfigsWithAllStructProps( - Collection properties) throws IOException; + List buildReindexConfigs( + Collection> properties) throws IOException; /** * Mirrors the service's functions which are expected to build/reindex as needed based on the * reindex configurations above */ - void reindexAll() throws IOException; + void reindexAll(Collection> properties) + throws IOException; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java new file mode 100644 index 0000000000000..41addbe197f27 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java @@ -0,0 +1,201 @@ +package com.linkedin.metadata.structuredproperties.hooks; + +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_KEY_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.entity.Aspect; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.patch.GenericJsonPatch; +import com.linkedin.metadata.aspect.patch.PatchOperationType; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffect; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.StructuredPropertyUtils; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.structured.StructuredPropertyDefinition; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Getter +@Setter +@Accessors(chain = true) +public class PropertyDefinitionDeleteSideEffect extends MCPSideEffect { + public static final Integer SEARCH_SCROLL_SIZE = 1000; + @Nonnull private AspectPluginConfig config; + + @Override + protected Stream applyMCPSideEffect( + Collection changeMCPS, @Nonnull RetrieverContext retrieverContext) { + return Stream.of(); + } + + @Override + protected Stream postMCPSideEffect( + Collection mclItems, @Nonnull RetrieverContext retrieverContext) { + return mclItems.stream().flatMap(item -> generatePatchRemove(item, retrieverContext)); + } + + private static Stream generatePatchRemove( + MCLItem mclItem, @Nonnull RetrieverContext retrieverContext) { + + if (STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME.equals(mclItem.getAspectName())) { + return generatePatchMCPs( + mclItem.getUrn(), + mclItem.getPreviousAspect(StructuredPropertyDefinition.class), + mclItem.getAuditStamp(), + retrieverContext); + } else if (STRUCTURED_PROPERTY_KEY_ASPECT_NAME.equals(mclItem.getAspectName())) { + Aspect definitionAspect = + retrieverContext + .getAspectRetriever() + .getLatestAspectObject(mclItem.getUrn(), STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME); + return generatePatchMCPs( + mclItem.getUrn(), + definitionAspect == null + ? null + : new StructuredPropertyDefinition(definitionAspect.data()), + mclItem.getAuditStamp(), + retrieverContext); + } + log.warn( + "Expected either {} or {} aspects but got {}", + STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, + STRUCTURED_PROPERTY_KEY_ASPECT_NAME, + mclItem.getAspectName()); + return Stream.empty(); + } + + private static Stream generatePatchMCPs( + Urn propertyUrn, + @Nullable StructuredPropertyDefinition definition, + @Nullable AuditStamp auditStamp, + @Nonnull RetrieverContext retrieverContext) { + EntityWithPropertyIterator iterator = + EntityWithPropertyIterator.builder() + .propertyUrn(propertyUrn) + .definition(definition) + .searchRetriever(retrieverContext.getSearchRetriever()) + .count(SEARCH_SCROLL_SIZE) + .build(); + return StreamSupport.stream( + Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .flatMap( + scrollResult -> + scrollResult.getEntities().stream() + .map( + entity -> { + GenericJsonPatch.PatchOp patchOp = new GenericJsonPatch.PatchOp(); + patchOp.setOp(PatchOperationType.REMOVE.getValue()); + patchOp.setPath(String.format("/properties/%s", propertyUrn.toString())); + + EntitySpec entitySpec = + retrieverContext + .getAspectRetriever() + .getEntityRegistry() + .getEntitySpec(entity.getEntity().getEntityType()); + return PatchItemImpl.builder() + .urn(entity.getEntity()) + .entitySpec(entitySpec) + .aspectName(STRUCTURED_PROPERTIES_ASPECT_NAME) + .aspectSpec( + entitySpec.getAspectSpec(STRUCTURED_PROPERTIES_ASPECT_NAME)) + .patch( + GenericJsonPatch.builder() + .arrayPrimaryKeys( + Map.of("properties", List.of("propertyUrn"))) + .patch(List.of(patchOp)) + .build() + .getJsonPatch()) + .auditStamp(auditStamp) + .build(retrieverContext.getAspectRetriever().getEntityRegistry()); + })); + } + + /** + * Fetches pages of entity urns which have a value for the given structured property definition + */ + @Builder + public static class EntityWithPropertyIterator implements Iterator { + @Nonnull private final Urn propertyUrn; + @Nullable private final StructuredPropertyDefinition definition; + @Nonnull private final SearchRetriever searchRetriever; + private int count; + @Builder.Default private String scrollId = null; + @Builder.Default private boolean started = false; + + private List getEntities() { + if (definition != null && definition.getEntityTypes() != null) { + return definition.getEntityTypes().stream() + .map(StructuredPropertyUtils::getValueTypeId) + .collect(Collectors.toList()); + } else { + return Collections.emptyList(); + } + } + + private Filter getFilter() { + Filter propertyFilter = new Filter(); + final ConjunctiveCriterionArray disjunction = new ConjunctiveCriterionArray(); + final ConjunctiveCriterion conjunction = new ConjunctiveCriterion(); + final CriterionArray andCriterion = new CriterionArray(); + + final Criterion propertyExistsCriterion = new Criterion(); + // Cannot rely on automatic field name since the definition is deleted + propertyExistsCriterion.setField( + STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX + + StructuredPropertyUtils.toElasticsearchFieldName(propertyUrn, definition)); + propertyExistsCriterion.setCondition(Condition.EXISTS); + + andCriterion.add(propertyExistsCriterion); + conjunction.setAnd(andCriterion); + disjunction.add(conjunction); + propertyFilter.setOr(disjunction); + + return propertyFilter; + } + + @Override + public boolean hasNext() { + return !started || scrollId != null; + } + + @Override + public ScrollResult next() { + started = true; + ScrollResult result = searchRetriever.scroll(getEntities(), getFilter(), scrollId, count); + scrollId = result.getScrollId(); + return result; + } + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/StructuredPropertiesSoftDelete.java similarity index 96% rename from entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java rename to metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/StructuredPropertiesSoftDelete.java index 5efb1e8aebb06..99b5832837675 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/StructuredPropertiesSoftDelete.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.aspect.hooks; +package com.linkedin.metadata.structuredproperties.hooks; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.aspect.ReadItem; diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/PropertyDefinitionValidator.java similarity index 68% rename from entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java rename to metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/PropertyDefinitionValidator.java index a4efc38d16082..ae5472af622ad 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/PropertyDefinitionValidator.java @@ -1,20 +1,20 @@ -package com.linkedin.metadata.aspect.validation; +package com.linkedin.metadata.structuredproperties.validation; import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; import static com.linkedin.structured.PropertyCardinality.*; import com.google.common.collect.ImmutableSet; import com.linkedin.common.Status; import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.GetMode; import com.linkedin.entity.Aspect; -import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; @@ -25,12 +25,13 @@ import java.util.Collection; import java.util.Collections; import java.util.Map; -import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.Getter; import lombok.Setter; import lombok.experimental.Accessors; @@ -52,25 +53,7 @@ public class PropertyDefinitionValidator extends AspectPayloadValidator { protected Stream validateProposedAspects( @Nonnull Collection mcpItems, @Nonnull RetrieverContext retrieverContext) { - final String entityKeyAspect = - retrieverContext - .getAspectRetriever() - .getEntityRegistry() - .getEntitySpec(STRUCTURED_PROPERTY_ENTITY_NAME) - .getKeyAspectName(); - - return mcpItems.stream() - .filter(i -> ChangeType.DELETE.equals(i.getChangeType())) - .map( - i -> { - if (ImmutableSet.of(entityKeyAspect, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) - .contains(i.getAspectSpec().getName())) { - return AspectValidationException.forItem( - i, "Hard delete of Structured Property Definitions is not supported."); - } - return null; - }) - .filter(Objects::nonNull); + return Stream.empty(); } @Override @@ -78,10 +61,7 @@ protected Stream validatePreCommitAspects( @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { return validateDefinitionUpserts( changeMCPs.stream() - .filter( - i -> - ChangeType.UPSERT.equals(i.getChangeType()) - && STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME.equals(i.getAspectName())) + .filter(i -> STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME.equals(i.getAspectName())) .collect(Collectors.toList()), retrieverContext); } @@ -105,19 +85,24 @@ public static Stream validateDefinitionUpserts( "Cannot mutate a soft deleted Structured Property Definition") .ifPresent(exceptions::addException); + final StructuredPropertyDefinition newDefinition = + item.getAspect(StructuredPropertyDefinition.class); + + versionFormatCheck(item, newDefinition.getVersion()).ifPresent(exceptions::addException); + if (item.getPreviousSystemAspect() != null) { StructuredPropertyDefinition previousDefinition = item.getPreviousSystemAspect().getAspect(StructuredPropertyDefinition.class); - StructuredPropertyDefinition newDefinition = - item.getAspect(StructuredPropertyDefinition.class); - if (!newDefinition.getValueType().equals(previousDefinition.getValueType())) { + if (!newDefinition.getValueType().equals(previousDefinition.getValueType()) + && !allowBreakingWithVersion(previousDefinition, newDefinition, item, exceptions)) { exceptions.addException( item, "Value type cannot be changed as this is a backwards incompatible change"); } if (newDefinition.getCardinality().equals(SINGLE) - && previousDefinition.getCardinality().equals(MULTIPLE)) { + && previousDefinition.getCardinality().equals(MULTIPLE) + && !allowBreakingWithVersion(previousDefinition, newDefinition, item, exceptions)) { exceptions.addException( item, "Property definition cardinality cannot be changed from MULTI to SINGLE"); } @@ -127,10 +112,12 @@ public static Stream validateDefinitionUpserts( } // Assure new definition has only added allowed values, not removed them if (newDefinition.getAllowedValues() != null) { - if (!previousDefinition.hasAllowedValues() - || previousDefinition.getAllowedValues() == null) { + if ((!previousDefinition.hasAllowedValues() + || previousDefinition.getAllowedValues() == null) + && !allowBreakingWithVersion(previousDefinition, newDefinition, item, exceptions)) { exceptions.addException(item, "Cannot restrict values that were previously allowed"); - } else { + } else if (!allowBreakingWithVersion( + previousDefinition, newDefinition, item, exceptions)) { Set newAllowedValues = newDefinition.getAllowedValues().stream() .map(PropertyValue::getValue) @@ -163,4 +150,46 @@ static Optional softDeleteCheck } return Optional.empty(); } + + /** + * Allow new version if monotonically increasing + * + * @param oldDefinition previous version + * @param newDefinition next version + * @return whether version increase should allow breaking change + */ + private static boolean allowBreakingWithVersion( + @Nonnull StructuredPropertyDefinition oldDefinition, + @Nonnull StructuredPropertyDefinition newDefinition, + @Nonnull ChangeMCP item, + @Nonnull ValidationExceptionCollection exceptions) { + final String oldVersion = oldDefinition.getVersion(GetMode.NULL); + final String newVersion = newDefinition.getVersion(GetMode.NULL); + + if (newVersion != null && newVersion.contains(".")) { + exceptions.addException( + item, + String.format("Invalid version `%s` cannot contain the `.` character.", newVersion)); + } + + if (oldVersion == null && newVersion != null) { + return true; + } else if (newVersion != null) { + return newVersion.compareToIgnoreCase(oldVersion) > 0; + } + return false; + } + + private static Pattern VERSION_REGEX = Pattern.compile("[0-9]{14}"); + + private static Optional versionFormatCheck( + MCPItem item, @Nullable String version) { + if (version != null && !VERSION_REGEX.matcher(version).matches()) { + return Optional.of( + AspectValidationException.forItem( + item, + String.format("Invalid version specified. Must match %s", VERSION_REGEX.toString()))); + } + return Optional.empty(); + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/StructuredPropertiesValidator.java similarity index 94% rename from entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java rename to metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/StructuredPropertiesValidator.java index fcae6ca8cb71a..cdbe2eb95a15d 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/StructuredPropertiesValidator.java @@ -1,7 +1,9 @@ -package com.linkedin.metadata.aspect.validation; +package com.linkedin.metadata.structuredproperties.validation; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; -import static com.linkedin.metadata.aspect.validation.PropertyDefinitionValidator.softDeleteCheck; +import static com.linkedin.metadata.models.StructuredPropertyUtils.getLogicalValueType; +import static com.linkedin.metadata.models.StructuredPropertyUtils.getValueTypeId; +import static com.linkedin.metadata.structuredproperties.validation.PropertyDefinitionValidator.softDeleteCheck; import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; @@ -66,23 +68,6 @@ public class StructuredPropertiesValidator extends AspectPayloadValidator { LogicalValueType.DATE, LogicalValueType.URN)); - public static LogicalValueType getLogicalValueType(Urn valueType) { - String valueTypeId = getValueTypeId(valueType); - if (valueTypeId.equals("string")) { - return LogicalValueType.STRING; - } else if (valueTypeId.equals("date")) { - return LogicalValueType.DATE; - } else if (valueTypeId.equals("number")) { - return LogicalValueType.NUMBER; - } else if (valueTypeId.equals("urn")) { - return LogicalValueType.URN; - } else if (valueTypeId.equals("rich_text")) { - return LogicalValueType.RICH_TEXT; - } - - return LogicalValueType.UNKNOWN; - } - @Nonnull private AspectPluginConfig config; @Override @@ -132,7 +117,11 @@ public static Stream validateProposedUpserts( StructuredPropertyDefinition structuredPropertyDefinition = lookupPropertyDefinition(propertyUrn, allStructuredPropertiesAspects); if (structuredPropertyDefinition == null) { - exceptions.addException(i, "Unexpected null value found."); + exceptions.addException( + i, + String.format( + "Unexpected null value found for %s Structured Property Definition.", + propertyUrn)); } log.debug( @@ -237,7 +226,7 @@ private static Set validateStructuredPropertyUrns( for (BatchItem i : exceptions.successful(mcpItems)) { StructuredProperties structuredProperties = i.getAspect(StructuredProperties.class); - log.warn("Validator called with {}", structuredProperties); + log.info("Validator called with {}", structuredProperties); Map> structuredPropertiesMap = structuredProperties.getProperties().stream() .collect( @@ -412,14 +401,6 @@ private static Optional validateType( return Optional.empty(); } - private static String getValueTypeId(@Nonnull final Urn valueType) { - String valueTypeId = valueType.getId(); - if (valueTypeId.startsWith("datahub.")) { - valueTypeId = valueTypeId.split("\\.")[1]; - } - return valueTypeId; - } - private static Map> fetchPropertyAspects( @Nonnull Collection mcpItems, AspectRetriever aspectRetriever, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java index 36eab7b69e6a1..13fde9e392927 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java @@ -4,6 +4,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; +import com.linkedin.common.urn.Urn; import com.linkedin.metadata.run.AspectRowSummary; import com.linkedin.metadata.run.IngestionRunSummary; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; @@ -14,6 +15,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.mxe.SystemMetadata; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.security.MessageDigest; @@ -227,10 +229,10 @@ public List listRuns( } @Override - public void configure() { + public void reindexAll(Collection> properties) { log.info("Setting up system metadata index"); try { - for (ReindexConfig config : buildReindexConfigs()) { + for (ReindexConfig config : buildReindexConfigs(properties)) { _indexBuilder.buildIndex(config); } } catch (IOException ie) { @@ -239,7 +241,8 @@ public void configure() { } @Override - public List buildReindexConfigs() throws IOException { + public List buildReindexConfigs( + Collection> properties) throws IOException { return List.of( _indexBuilder.buildReindexState( _indexConvention.getIndexName(INDEX_NAME), @@ -247,17 +250,6 @@ public List buildReindexConfigs() throws IOException { Collections.emptyMap())); } - @Override - public List buildReindexConfigsWithAllStructProps( - Collection properties) throws IOException { - return buildReindexConfigs(); - } - - @Override - public void reindexAll() { - configure(); - } - @VisibleForTesting @Override public void clear() { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java index 240108b191083..ce4ff53eba91b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java @@ -202,19 +202,9 @@ private static Pair toEnvAspectGener } @Override - public void configure() { - indexBuilders.reindexAll(); - } - - @Override - public List buildReindexConfigs() { - return indexBuilders.buildReindexConfigs(); - } - - @Override - public List buildReindexConfigsWithAllStructProps( - Collection properties) throws IOException { - return indexBuilders.buildReindexConfigsWithAllStructProps(properties); + public List buildReindexConfigs( + Collection> properties) throws IOException { + return indexBuilders.buildReindexConfigs(properties); } public String reindexAsync( @@ -224,8 +214,8 @@ public String reindexAsync( } @Override - public void reindexAll() { - configure(); + public void reindexAll(Collection> properties) { + indexBuilders.reindexAll(properties); } @Override @@ -308,7 +298,7 @@ public long countByFilter( .getEntityRegistry() .getEntitySpec(entityName) .getSearchableFieldTypes(), - opContext.getRetrieverContext().get().getAspectRetriever())); + opContext.getAspectRetriever())); CountRequest countRequest = new CountRequest(); countRequest.query(filterQueryBuilder); countRequest.indices(indexName); @@ -338,10 +328,7 @@ public List getAspectValues( QueryBuilders.boolQuery() .must( ESUtils.buildFilterQuery( - filter, - true, - searchableFieldTypes, - opContext.getRetrieverContext().get().getAspectRetriever())); + filter, true, searchableFieldTypes, opContext.getAspectRetriever())); filterQueryBuilder.must(QueryBuilders.matchQuery("urn", urn.toString())); // NOTE: We are interested only in the un-exploded rows as only they carry the `event` payload. filterQueryBuilder.mustNot(QueryBuilders.termQuery(MappingsBuilder.IS_EXPLODED_FIELD, true)); @@ -352,7 +339,8 @@ public List getAspectValues( .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) .setValue(startTimeMillis.toString()); filterQueryBuilder.must( - ESUtils.getQueryBuilderFromCriterion(startTimeCriterion, true, searchableFieldTypes)); + ESUtils.getQueryBuilderFromCriterion( + startTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); } if (endTimeMillis != null) { Criterion endTimeCriterion = @@ -361,7 +349,8 @@ public List getAspectValues( .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) .setValue(endTimeMillis.toString()); filterQueryBuilder.must( - ESUtils.getQueryBuilderFromCriterion(endTimeCriterion, true, searchableFieldTypes)); + ESUtils.getQueryBuilderFromCriterion( + endTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); } final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.query(filterQueryBuilder); @@ -446,7 +435,7 @@ public DeleteAspectValuesResult deleteAspectValues( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getRetrieverContext().get().getAspectRetriever()); + opContext.getAspectRetriever()); final Optional result = bulkProcessor @@ -482,7 +471,7 @@ public String deleteAspectValuesAsync( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getRetrieverContext().get().getAspectRetriever()); + opContext.getAspectRetriever()); final int batchSize = options.getBatchSize() > 0 ? options.getBatchSize() : DEFAULT_LIMIT; TimeValue timeout = options.getTimeoutSeconds() > 0 @@ -516,7 +505,7 @@ public String reindexAsync( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getRetrieverContext().get().getAspectRetriever()); + opContext.getAspectRetriever()); try { return this.reindexAsync(indexName, filterQueryBuilder, options); } catch (Exception e) { @@ -574,10 +563,7 @@ public TimeseriesScrollResult scrollAspects( QueryBuilders.boolQuery() .filter( ESUtils.buildFilterQuery( - filter, - true, - searchableFieldTypes, - opContext.getRetrieverContext().get().getAspectRetriever())); + filter, true, searchableFieldTypes, opContext.getAspectRetriever())); if (startTimeMillis != null) { Criterion startTimeCriterion = @@ -586,7 +572,8 @@ public TimeseriesScrollResult scrollAspects( .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) .setValue(startTimeMillis.toString()); filterQueryBuilder.filter( - ESUtils.getQueryBuilderFromCriterion(startTimeCriterion, true, searchableFieldTypes)); + ESUtils.getQueryBuilderFromCriterion( + startTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); } if (endTimeMillis != null) { Criterion endTimeCriterion = @@ -595,7 +582,8 @@ public TimeseriesScrollResult scrollAspects( .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) .setValue(endTimeMillis.toString()); filterQueryBuilder.filter( - ESUtils.getQueryBuilderFromCriterion(endTimeCriterion, true, searchableFieldTypes)); + ESUtils.getQueryBuilderFromCriterion( + endTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); } SearchResponse response = diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java index b59cd3a647d71..6b67789c3e2d8 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.timeseries.elastic.indexbuilder; +import com.linkedin.common.urn.Urn; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; @@ -29,8 +30,8 @@ public class TimeseriesAspectIndexBuilders implements ElasticSearchIndexed { @Nonnull private final IndexConvention indexConvention; @Override - public void reindexAll() { - for (ReindexConfig config : buildReindexConfigs()) { + public void reindexAll(Collection> properties) { + for (ReindexConfig config : buildReindexConfigs(properties)) { try { indexBuilder.buildIndex(config); } catch (IOException e) { @@ -69,7 +70,8 @@ public String reindexAsync( } @Override - public List buildReindexConfigs() { + public List buildReindexConfigs( + Collection> properties) { return entityRegistry.getEntitySpecs().values().stream() .flatMap( entitySpec -> @@ -94,10 +96,4 @@ public List buildReindexConfigs() { }) .collect(Collectors.toList()); } - - @Override - public List buildReindexConfigsWithAllStructProps( - Collection properties) throws IOException { - return buildReindexConfigs(); - } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java index 95665450a2572..1bf96841e5fe1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java @@ -5,6 +5,7 @@ import com.linkedin.data.schema.DataSchema; import com.linkedin.data.template.StringArray; import com.linkedin.data.template.StringArrayArray; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.TimeseriesFieldCollectionSpec; @@ -374,12 +375,13 @@ public GenericTable getAggregatedStats( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getRetrieverContext().get().getAspectRetriever()); + opContext.getAspectRetriever()); AspectSpec aspectSpec = getTimeseriesAspectSpec(opContext, entityName, aspectName); // Build and attach the grouping aggregations final Pair topAndBottomAggregations = - makeGroupingAggregationBuilder(aspectSpec, null, groupingBuckets); + makeGroupingAggregationBuilder( + aspectSpec, null, groupingBuckets, opContext.getAspectRetriever()); AggregationBuilder rootAggregationBuilder = topAndBottomAggregations.getFirst(); AggregationBuilder mostNested = topAndBottomAggregations.getSecond(); @@ -462,7 +464,8 @@ private void addAggregationBuildersFromAggregationSpec( private Pair makeGroupingAggregationBuilder( AspectSpec aspectSpec, @Nullable AggregationBuilder baseAggregationBuilder, - @Nullable GroupingBucket[] groupingBuckets) { + @Nullable GroupingBucket[] groupingBuckets, + @Nonnull AspectRetriever aspectRetriever) { AggregationBuilder firstAggregationBuilder = baseAggregationBuilder; AggregationBuilder lastAggregationBuilder = baseAggregationBuilder; @@ -481,7 +484,8 @@ private Pair makeGroupingAggregationBuil } else if (curGroupingBucket.getType() == GroupingBucketType.STRING_GROUPING_BUCKET) { // Process the string grouping bucket using the 'terms' aggregation. // The field can be Keyword, Numeric, ip, boolean, or binary. - String fieldName = ESUtils.toKeywordField(curGroupingBucket.getKey(), true); + String fieldName = + ESUtils.toKeywordField(curGroupingBucket.getKey(), true, aspectRetriever); DataSchema.Type fieldType = getGroupingBucketKeyType(aspectSpec, curGroupingBucket); curAggregationBuilder = AggregationBuilders.terms(getGroupingBucketAggName(curGroupingBucket)) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java index 1cffbb6e2cf21..12b12cf105196 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java @@ -46,7 +46,7 @@ public static Map ingestCorpUserKeyAspects( .recordTemplate(aspect) .auditStamp(AspectGenerationUtils.createAuditStamp()) .systemMetadata(AspectGenerationUtils.createSystemMetadata()) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); } entityService.ingestAspects( opContext, @@ -83,7 +83,7 @@ public static Map ingestCorpUserInfoAspects( .recordTemplate(aspect) .auditStamp(AspectGenerationUtils.createAuditStamp()) .systemMetadata(AspectGenerationUtils.createSystemMetadata()) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); } entityService.ingestAspects( opContext, @@ -121,7 +121,7 @@ public static Map ingestChartInfoAspects( .recordTemplate(aspect) .auditStamp(AspectGenerationUtils.createAuditStamp()) .systemMetadata(AspectGenerationUtils.createSystemMetadata()) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); } entityService.ingestAspects( opContext, diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java index 23513738fbc33..ef6c9e56e132b 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java @@ -92,11 +92,11 @@ private void configureComponents() { .entityRegistry(_testEntityRegistry) .build()) .graphRetriever(TestOperationContexts.emptyGraphRetriever) + .searchRetriever(TestOperationContexts.emptySearchRetriever) .build(), null, opContext -> - ((EntityServiceAspectRetriever) - opContext.getRetrieverContext().get().getAspectRetriever()) + ((EntityServiceAspectRetriever) opContext.getAspectRetrieverOpt().get()) .setSystemOperationContext(opContext)); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java index 76c14be5f8c13..b9f5984e57667 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java @@ -92,11 +92,11 @@ public void setupTest() { .entityRegistry(_testEntityRegistry) .build()) .graphRetriever(TestOperationContexts.emptyGraphRetriever) + .searchRetriever(TestOperationContexts.emptySearchRetriever) .build(), null, opContext -> - ((EntityServiceAspectRetriever) - opContext.getRetrieverContext().get().getAspectRetriever()) + ((EntityServiceAspectRetriever) opContext.getAspectRetrieverOpt().get()) .setSystemOperationContext(opContext)); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index 9a8186cc838ab..91b01c55aac39 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -927,28 +927,28 @@ public void testRollbackAspect() throws AssertionError { .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn2) .aspectName(aspectName) .recordTemplate(writeAspect2) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn3) .aspectName(aspectName) .recordTemplate(writeAspect3) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(aspectName) .recordTemplate(writeAspect1Overwrite) .systemMetadata(metadata2) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1019,21 +1019,21 @@ public void testRollbackKey() throws AssertionError { .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(keyAspectName) .recordTemplate(writeKey1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(aspectName) .recordTemplate(writeAspect1Overwrite) .systemMetadata(metadata2) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1112,35 +1112,35 @@ public void testRollbackUrn() throws AssertionError { .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(keyAspectName) .recordTemplate(writeKey1) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn2) .aspectName(aspectName) .recordTemplate(writeAspect2) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn3) .aspectName(aspectName) .recordTemplate(writeAspect3) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(aspectName) .recordTemplate(writeAspect1Overwrite) .systemMetadata(metadata2) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1188,7 +1188,7 @@ public void testIngestGetLatestAspect() throws AssertionError { .recordTemplate(writeAspect1) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1244,7 +1244,7 @@ public void testIngestGetLatestAspect() throws AssertionError { .recordTemplate(writeAspect2) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata2) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1298,7 +1298,7 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception { .recordTemplate(writeAspect1) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1325,7 +1325,7 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception { .recordTemplate(writeAspect2) .systemMetadata(metadata2) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1394,7 +1394,7 @@ public void testIngestSameAspect() throws AssertionError { .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1450,7 +1450,7 @@ public void testIngestSameAspect() throws AssertionError { .recordTemplate(writeAspect2) .systemMetadata(metadata2) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1511,42 +1511,42 @@ public void testRetention() throws AssertionError { .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect1a) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect1b) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2a) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2b) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1587,14 +1587,14 @@ public void testRetention() throws AssertionError { .recordTemplate(writeAspect1c) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2c) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphContainer.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphContainer.java index 481db53eafbbe..9c67c610196ed 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphContainer.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphContainer.java @@ -224,9 +224,6 @@ public Set getLivenessCheckPortNumbers() { return Stream.of(getHttpPort(), getGrpcPort()).map(this::getMappedPort).collect(toSet()); } - @Override - protected void configure() {} - public int getHttpPort() { return getMappedPort(HTTP_PORT); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java index d1ee1996e5b8a..b4ad5ce61d8f4 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java @@ -72,7 +72,7 @@ public abstract class SearchGraphServiceTestBase extends GraphServiceTestBase { @BeforeClass public void setup() { _client = buildService(_enableMultiPathSearch); - _client.configure(); + _client.reindexAll(Collections.emptySet()); } @BeforeMethod @@ -121,7 +121,7 @@ protected GraphService getGraphService(boolean enableMultiPathSearch) { if (enableMultiPathSearch != _enableMultiPathSearch) { _enableMultiPathSearch = enableMultiPathSearch; _client = buildService(enableMultiPathSearch); - _client.configure(); + _client.reindexAll(Collections.emptySet()); } return _client; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java index 3f03c64c6f921..3dbbfb2cebc3f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java @@ -126,7 +126,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException { .asSession(RequestContext.TEST, Authorizer.EMPTY, TestOperationContexts.TEST_USER_AUTH); settingsBuilder = new SettingsBuilder(null); elasticSearchService = buildEntitySearchService(); - elasticSearchService.configure(); + elasticSearchService.reindexAll(Collections.emptySet()); cacheManager = new ConcurrentMapCacheManager(); graphService = mock(GraphService.class); resetService(true, false); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java index d5a21cfecee27..a610cf95f827a 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java @@ -39,6 +39,7 @@ import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URISyntaxException; +import java.util.Collections; import javax.annotation.Nonnull; import org.opensearch.client.RestHighLevelClient; import org.springframework.cache.CacheManager; @@ -83,7 +84,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException { settingsBuilder = new SettingsBuilder(null); elasticSearchService = buildEntitySearchService(); - elasticSearchService.configure(); + elasticSearchService.reindexAll(Collections.emptySet()); cacheManager = new ConcurrentMapCacheManager(); resetSearchService(); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java index c3dcf3aaee9b7..58574025aeeac 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java @@ -25,6 +25,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.Collections; import java.util.List; import javax.annotation.Nonnull; import org.opensearch.client.RestHighLevelClient; @@ -64,7 +65,7 @@ public void setup() { new SnapshotEntityRegistry(new Snapshot()), new IndexConventionImpl("es_service_test")); settingsBuilder = new SettingsBuilder(null); elasticSearchService = buildService(); - elasticSearchService.configure(); + elasticSearchService.reindexAll(Collections.emptySet()); } @BeforeMethod diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/IndexBuilderTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/IndexBuilderTestBase.java index 0858c3dd7eb99..92ca4c5ed8a05 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/IndexBuilderTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/IndexBuilderTestBase.java @@ -57,6 +57,7 @@ public void setup() { Map.of(), false, false, + false, new ElasticSearchConfiguration(), gitVersion); } @@ -103,6 +104,7 @@ public void testESIndexBuilderCreation() throws Exception { Map.of(), false, false, + false, new ElasticSearchConfiguration(), gitVersion); customIndexBuilder.buildIndex(TEST_INDEX_NAME, Map.of(), Map.of()); @@ -126,6 +128,7 @@ public void testMappingReindex() throws Exception { Map.of(), false, true, + false, new ElasticSearchConfiguration(), gitVersion); @@ -197,6 +200,7 @@ public void testSettingsNumberOfShardsReindex() throws Exception { Map.of(), true, false, + false, new ElasticSearchConfiguration(), gitVersion); @@ -238,6 +242,7 @@ public void testSettingsNoReindex() throws Exception { Map.of(), true, false, + false, new ElasticSearchConfiguration(), gitVersion), new ESIndexBuilder( @@ -249,6 +254,7 @@ public void testSettingsNoReindex() throws Exception { Map.of(), true, false, + false, new ElasticSearchConfiguration(), gitVersion), new ESIndexBuilder( @@ -260,6 +266,7 @@ public void testSettingsNoReindex() throws Exception { Map.of(), false, false, + false, new ElasticSearchConfiguration(), gitVersion), new ESIndexBuilder( @@ -271,6 +278,7 @@ public void testSettingsNoReindex() throws Exception { Map.of(), false, false, + false, new ElasticSearchConfiguration(), gitVersion)); @@ -314,6 +322,7 @@ public void testCopyStructuredPropertyMappings() throws Exception { Map.of(), false, true, + false, new ElasticSearchConfiguration(), gitVersion); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java index 9185e2e7ee072..75da2bc62aaad 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java @@ -7,6 +7,8 @@ import com.google.common.collect.ImmutableMap; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.SetMode; import com.linkedin.metadata.TestEntitySpecBuilder; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.EntitySpecBuilder; @@ -15,6 +17,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.MappingsBuilder; import com.linkedin.metadata.search.elasticsearch.query.request.TestSearchFieldConfig; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.io.Serializable; import java.net.URISyntaxException; import java.util.List; @@ -178,19 +181,25 @@ public void testGetMappingsWithStructuredProperty() throws URISyntaxException { // Test that a structured property that does not apply to the entity does not alter the mappings StructuredPropertyDefinition structPropNotForThisEntity = new StructuredPropertyDefinition() + .setVersion(null, SetMode.REMOVE_IF_NULL) .setQualifiedName("propNotForThis") .setDisplayName("propNotForThis") .setEntityTypes(new UrnArray(Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "dataset"))) .setValueType(Urn.createFromString("urn:li:logicalType:STRING")); Map resultWithOnlyUnrelatedStructuredProp = MappingsBuilder.getMappings( - TestEntitySpecBuilder.getSpec(), List.of(structPropNotForThisEntity)); + TestEntitySpecBuilder.getSpec(), + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propNotForThis"), + structPropNotForThisEntity))); assertEquals(resultWithOnlyUnrelatedStructuredProp, resultWithoutStructuredProps); // Test that a structured property that does apply to this entity is included in the mappings String fqnOfRelatedProp = "propForThis"; StructuredPropertyDefinition structPropForThisEntity = new StructuredPropertyDefinition() + .setVersion(null, SetMode.REMOVE_IF_NULL) .setQualifiedName(fqnOfRelatedProp) .setDisplayName("propForThis") .setEntityTypes( @@ -200,7 +209,11 @@ public void testGetMappingsWithStructuredProperty() throws URISyntaxException { .setValueType(Urn.createFromString("urn:li:logicalType:STRING")); Map resultWithOnlyRelatedStructuredProp = MappingsBuilder.getMappings( - TestEntitySpecBuilder.getSpec(), List.of(structPropForThisEntity)); + TestEntitySpecBuilder.getSpec(), + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propForThis"), + structPropForThisEntity))); assertNotEquals(resultWithOnlyRelatedStructuredProp, resultWithoutStructuredProps); Map fieldsBefore = (Map) resultWithoutStructuredProps.get("properties"); @@ -231,7 +244,95 @@ public void testGetMappingsWithStructuredProperty() throws URISyntaxException { Map resultWithBothStructuredProps = MappingsBuilder.getMappings( TestEntitySpecBuilder.getSpec(), - List.of(structPropForThisEntity, structPropNotForThisEntity)); + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propForThis"), + structPropForThisEntity), + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propNotForThis"), + structPropNotForThisEntity))); + assertEquals(resultWithBothStructuredProps, resultWithOnlyRelatedStructuredProp); + } + + @Test + public void testGetMappingsWithStructuredPropertyV1() throws URISyntaxException { + // Baseline comparison: Mappings with no structured props + Map resultWithoutStructuredProps = + MappingsBuilder.getMappings(TestEntitySpecBuilder.getSpec()); + + // Test that a structured property that does not apply to the entity does not alter the mappings + StructuredPropertyDefinition structPropNotForThisEntity = + new StructuredPropertyDefinition() + .setVersion("00000000000001") + .setQualifiedName("propNotForThis") + .setDisplayName("propNotForThis") + .setEntityTypes(new UrnArray(Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "dataset"))) + .setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + Map resultWithOnlyUnrelatedStructuredProp = + MappingsBuilder.getMappings( + TestEntitySpecBuilder.getSpec(), + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propNotForThis"), + structPropNotForThisEntity))); + assertEquals(resultWithOnlyUnrelatedStructuredProp, resultWithoutStructuredProps); + + // Test that a structured property that does apply to this entity is included in the mappings + String fqnOfRelatedProp = "propForThis"; + StructuredPropertyDefinition structPropForThisEntity = + new StructuredPropertyDefinition() + .setVersion("00000000000001") + .setQualifiedName(fqnOfRelatedProp) + .setDisplayName("propForThis") + .setEntityTypes( + new UrnArray( + Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "dataset"), + Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "testEntity"))) + .setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + Map resultWithOnlyRelatedStructuredProp = + MappingsBuilder.getMappings( + TestEntitySpecBuilder.getSpec(), + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propForThis"), + structPropForThisEntity))); + assertNotEquals(resultWithOnlyRelatedStructuredProp, resultWithoutStructuredProps); + Map fieldsBefore = + (Map) resultWithoutStructuredProps.get("properties"); + Map fieldsAfter = + (Map) resultWithOnlyRelatedStructuredProp.get("properties"); + assertEquals(fieldsAfter.size(), fieldsBefore.size() + 1); + + Map structProps = (Map) fieldsAfter.get("structuredProperties"); + fieldsAfter = (Map) structProps.get("properties"); + + String newField = + fieldsAfter.keySet().stream() + .filter(field -> !fieldsBefore.containsKey(field)) + .findFirst() + .get(); + assertEquals(newField, "_versioned." + fqnOfRelatedProp + ".00000000000001.string"); + assertEquals( + fieldsAfter.get(newField), + Map.of( + "normalizer", + "keyword_normalizer", + "type", + "keyword", + "fields", + Map.of("keyword", Map.of("type", "keyword")))); + + // Test that only structured properties that apply are included + Map resultWithBothStructuredProps = + MappingsBuilder.getMappings( + TestEntitySpecBuilder.getSpec(), + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propForThis"), + structPropForThisEntity), + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propNotForThis"), + structPropNotForThisEntity))); assertEquals(resultWithBothStructuredProps, resultWithOnlyRelatedStructuredProp); } @@ -239,6 +340,7 @@ public void testGetMappingsWithStructuredProperty() throws URISyntaxException { public void testGetMappingsForStructuredProperty() throws URISyntaxException { StructuredPropertyDefinition testStructProp = new StructuredPropertyDefinition() + .setVersion(null, SetMode.REMOVE_IF_NULL) .setQualifiedName("testProp") .setDisplayName("exampleProp") .setEntityTypes( @@ -247,10 +349,14 @@ public void testGetMappingsForStructuredProperty() throws URISyntaxException { Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "testEntity"))) .setValueType(Urn.createFromString("urn:li:logicalType:STRING")); Map structuredPropertyFieldMappings = - MappingsBuilder.getMappingsForStructuredProperty(List.of(testStructProp)); + MappingsBuilder.getMappingsForStructuredProperty( + List.of( + (Pair.of( + UrnUtils.getUrn("urn:li:structuredProperties:testProp"), testStructProp)))); assertEquals(structuredPropertyFieldMappings.size(), 1); String keyInMap = structuredPropertyFieldMappings.keySet().stream().findFirst().get(); assertEquals(keyInMap, "testProp"); + Object mappings = structuredPropertyFieldMappings.get(keyInMap); assertEquals( mappings, @@ -264,6 +370,7 @@ public void testGetMappingsForStructuredProperty() throws URISyntaxException { StructuredPropertyDefinition propWithNumericType = new StructuredPropertyDefinition() + .setVersion(null, SetMode.REMOVE_IF_NULL) .setQualifiedName("testPropNumber") .setDisplayName("examplePropNumber") .setEntityTypes( @@ -272,7 +379,11 @@ public void testGetMappingsForStructuredProperty() throws URISyntaxException { Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "testEntity"))) .setValueType(Urn.createFromString("urn:li:logicalType:NUMBER")); Map structuredPropertyFieldMappingsNumber = - MappingsBuilder.getMappingsForStructuredProperty(List.of(propWithNumericType)); + MappingsBuilder.getMappingsForStructuredProperty( + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperties:testPropNumber"), + propWithNumericType))); assertEquals(structuredPropertyFieldMappingsNumber.size(), 1); keyInMap = structuredPropertyFieldMappingsNumber.keySet().stream().findFirst().get(); assertEquals("testPropNumber", keyInMap); @@ -280,6 +391,61 @@ public void testGetMappingsForStructuredProperty() throws URISyntaxException { assertEquals(Map.of("type", "double"), mappings); } + @Test + public void testGetMappingsForStructuredPropertyV1() throws URISyntaxException { + StructuredPropertyDefinition testStructProp = + new StructuredPropertyDefinition() + .setVersion("00000000000001") + .setQualifiedName("testProp") + .setDisplayName("exampleProp") + .setEntityTypes( + new UrnArray( + Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "dataset"), + Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "testEntity"))) + .setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + Map structuredPropertyFieldMappings = + MappingsBuilder.getMappingsForStructuredProperty( + List.of( + (Pair.of( + UrnUtils.getUrn("urn:li:structuredProperties:testProp"), testStructProp)))); + assertEquals(structuredPropertyFieldMappings.size(), 1); + String keyInMap = structuredPropertyFieldMappings.keySet().stream().findFirst().get(); + assertEquals(keyInMap, "_versioned.testProp.00000000000001.string"); + + Object mappings = structuredPropertyFieldMappings.get(keyInMap); + assertEquals( + mappings, + Map.of( + "type", + "keyword", + "normalizer", + "keyword_normalizer", + "fields", + Map.of("keyword", Map.of("type", "keyword")))); + + StructuredPropertyDefinition propWithNumericType = + new StructuredPropertyDefinition() + .setVersion("00000000000001") + .setQualifiedName("testPropNumber") + .setDisplayName("examplePropNumber") + .setEntityTypes( + new UrnArray( + Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "dataset"), + Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "testEntity"))) + .setValueType(Urn.createFromString("urn:li:logicalType:NUMBER")); + Map structuredPropertyFieldMappingsNumber = + MappingsBuilder.getMappingsForStructuredProperty( + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:testPropNumber"), + propWithNumericType))); + assertEquals(structuredPropertyFieldMappingsNumber.size(), 1); + keyInMap = structuredPropertyFieldMappingsNumber.keySet().stream().findFirst().get(); + assertEquals(keyInMap, "_versioned.testPropNumber.00000000000001.number"); + mappings = structuredPropertyFieldMappingsNumber.get(keyInMap); + assertEquals(Map.of("type", "double"), mappings); + } + @Test public void testRefMappingsBuilder() { EntityRegistry entityRegistry = getTestEntityRegistry(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java index 43ae6bd7a48b7..0ea2340ae8217 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java @@ -1,21 +1,31 @@ package com.linkedin.metadata.search.query.request; +import static com.linkedin.metadata.Constants.DATA_TYPE_URN_PREFIX; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; import static com.linkedin.metadata.utils.SearchUtil.*; +import static org.mockito.ArgumentMatchers.anySet; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.SetMode; +import com.linkedin.entity.Aspect; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation; import com.linkedin.metadata.search.elasticsearch.query.request.AggregationQueryBuilder; import com.linkedin.r2.RemoteInvocationException; +import com.linkedin.structured.StructuredPropertyDefinition; import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URISyntaxException; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -28,10 +38,73 @@ public class AggregationQueryBuilderTest { private static AspectRetriever aspectRetriever; + private static AspectRetriever aspectRetrieverV1; @BeforeClass public static void setup() throws RemoteInvocationException, URISyntaxException { - aspectRetriever = TestOperationContexts.emptyAspectRetriever(null); + Urn helloUrn = Urn.createFromString("urn:li:structuredProperty:hello"); + Urn abFghTenUrn = Urn.createFromString("urn:li:structuredProperty:ab.fgh.ten"); + + // legacy + aspectRetriever = mock(AspectRetriever.class); + when(aspectRetriever.getEntityRegistry()) + .thenReturn(TestOperationContexts.defaultEntityRegistry()); + + StructuredPropertyDefinition structPropHelloDefinition = new StructuredPropertyDefinition(); + structPropHelloDefinition.setVersion(null, SetMode.REMOVE_IF_NULL); + structPropHelloDefinition.setValueType(Urn.createFromString(DATA_TYPE_URN_PREFIX + "string")); + structPropHelloDefinition.setQualifiedName("hello"); + when(aspectRetriever.getLatestAspectObjects(eq(Set.of(helloUrn)), anySet())) + .thenReturn( + Map.of( + helloUrn, + Map.of( + STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, + new Aspect(structPropHelloDefinition.data())))); + + StructuredPropertyDefinition structPropAbFghTenDefinition = new StructuredPropertyDefinition(); + structPropAbFghTenDefinition.setVersion(null, SetMode.REMOVE_IF_NULL); + structPropAbFghTenDefinition.setValueType( + Urn.createFromString(DATA_TYPE_URN_PREFIX + "string")); + structPropAbFghTenDefinition.setQualifiedName("ab.fgh.ten"); + when(aspectRetriever.getLatestAspectObjects(eq(Set.of(abFghTenUrn)), anySet())) + .thenReturn( + Map.of( + abFghTenUrn, + Map.of( + STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, + new Aspect(structPropAbFghTenDefinition.data())))); + + // V1 + aspectRetrieverV1 = mock(AspectRetriever.class); + when(aspectRetrieverV1.getEntityRegistry()) + .thenReturn(TestOperationContexts.defaultEntityRegistry()); + + StructuredPropertyDefinition structPropHelloDefinitionV1 = new StructuredPropertyDefinition(); + structPropHelloDefinitionV1.setVersion("00000000000001"); + structPropHelloDefinitionV1.setValueType(Urn.createFromString(DATA_TYPE_URN_PREFIX + "string")); + structPropHelloDefinitionV1.setQualifiedName("hello"); + when(aspectRetrieverV1.getLatestAspectObjects(eq(Set.of(helloUrn)), anySet())) + .thenReturn( + Map.of( + helloUrn, + Map.of( + STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, + new Aspect(structPropHelloDefinitionV1.data())))); + + StructuredPropertyDefinition structPropAbFghTenDefinitionV1 = + new StructuredPropertyDefinition(); + structPropAbFghTenDefinitionV1.setVersion("00000000000001"); + structPropAbFghTenDefinitionV1.setValueType( + Urn.createFromString(DATA_TYPE_URN_PREFIX + "string")); + structPropAbFghTenDefinitionV1.setQualifiedName("ab.fgh.ten"); + when(aspectRetrieverV1.getLatestAspectObjects(eq(Set.of(abFghTenUrn)), anySet())) + .thenReturn( + Map.of( + abFghTenUrn, + Map.of( + STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, + new Aspect(structPropAbFghTenDefinitionV1.data())))); } @Test @@ -171,26 +244,69 @@ public void testAggregateOverStructuredProperty() { List aggs = builder.getAggregations( - TestOperationContexts.systemContextNoSearchAuthorization(), + TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever), List.of("structuredProperties.ab.fgh.ten")); Assert.assertEquals(aggs.size(), 1); AggregationBuilder aggBuilder = aggs.get(0); Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder); TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder; // Check that field name is sanitized to correct field name - Assert.assertEquals(agg.field(), "structuredProperties.ab_fgh_ten"); + Assert.assertEquals( + agg.field(), + "structuredProperties.ab_fgh_ten.keyword", + "Terms aggregate must be on a keyword or subfield keyword"); // Two structured properties aggs = builder.getAggregations( - TestOperationContexts.systemContextNoSearchAuthorization(), + TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever), List.of("structuredProperties.ab.fgh.ten", "structuredProperties.hello")); Assert.assertEquals(aggs.size(), 2); Assert.assertEquals( aggs.stream() .map(aggr -> ((TermsAggregationBuilder) aggr).field()) .collect(Collectors.toSet()), - Set.of("structuredProperties.ab_fgh_ten", "structuredProperties.hello")); + Set.of("structuredProperties.ab_fgh_ten.keyword", "structuredProperties.hello.keyword")); + } + + @Test + public void testAggregateOverStructuredPropertyV1() { + SearchConfiguration config = new SearchConfiguration(); + config.setMaxTermBucketSize(25); + + AggregationQueryBuilder builder = + new AggregationQueryBuilder( + config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of())); + + List aggs = + builder.getAggregations( + TestOperationContexts.systemContextNoSearchAuthorization(aspectRetrieverV1), + List.of("structuredProperties.ab.fgh.ten")); + Assert.assertEquals(aggs.size(), 1); + AggregationBuilder aggBuilder = aggs.get(0); + Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder); + TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder; + // Check that field name is sanitized to correct field name + Assert.assertEquals( + agg.field(), + "structuredProperties._versioned.ab_fgh_ten.00000000000001.string.keyword", + "Terms aggregation must be on a keyword field or subfield."); + + // Two structured properties + aggs = + builder.getAggregations( + TestOperationContexts.systemContextNoSearchAuthorization(aspectRetrieverV1), + List.of( + "structuredProperties.ab.fgh.ten", + "structuredProperties._versioned.hello.00000000000001.string")); + Assert.assertEquals(aggs.size(), 2); + Assert.assertEquals( + aggs.stream() + .map(aggr -> ((TermsAggregationBuilder) aggr).field()) + .collect(Collectors.toSet()), + Set.of( + "structuredProperties._versioned.ab_fgh_ten.00000000000001.string.keyword", + "structuredProperties._versioned.hello.00000000000001.string.keyword")); } @Test @@ -240,7 +356,76 @@ public void testAggregateOverFieldsAndStructProp() { // Aggregate over fields and structured properties List aggs = builder.getAggregations( - TestOperationContexts.systemContextNoSearchAuthorization(), + TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever), + ImmutableList.of( + "test1", + "test2", + "hasTest1", + "structuredProperties.ab.fgh.ten", + "structuredProperties.hello")); + Assert.assertEquals(aggs.size(), 5); + Set facets = + aggs.stream() + .map(aggB -> ((TermsAggregationBuilder) aggB).field()) + .collect(Collectors.toSet()); + Assert.assertEquals( + facets, + ImmutableSet.of( + "test1.keyword", + "test2.keyword", + "hasTest1", + "structuredProperties.ab_fgh_ten.keyword", + "structuredProperties.hello.keyword")); + } + + @Test + public void testAggregateOverFieldsAndStructPropV1() { + SearchableAnnotation annotation1 = + new SearchableAnnotation( + "test1", + SearchableAnnotation.FieldType.KEYWORD, + true, + true, + false, + false, + Optional.empty(), + Optional.of("Has Test"), + 1.0, + Optional.of("hasTest1"), + Optional.empty(), + Collections.emptyMap(), + Collections.emptyList(), + false); + + SearchableAnnotation annotation2 = + new SearchableAnnotation( + "test2", + SearchableAnnotation.FieldType.KEYWORD, + true, + true, + false, + false, + Optional.of("Test Filter"), + Optional.empty(), + 1.0, + Optional.empty(), + Optional.empty(), + Collections.emptyMap(), + Collections.emptyList(), + false); + + SearchConfiguration config = new SearchConfiguration(); + config.setMaxTermBucketSize(25); + + AggregationQueryBuilder builder = + new AggregationQueryBuilder( + config, + ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation1, annotation2))); + + // Aggregate over fields and structured properties + List aggs = + builder.getAggregations( + TestOperationContexts.systemContextNoSearchAuthorization(aspectRetrieverV1), ImmutableList.of( "test1", "test2", @@ -258,8 +443,8 @@ public void testAggregateOverFieldsAndStructProp() { "test1.keyword", "test2.keyword", "hasTest1", - "structuredProperties.ab_fgh_ten", - "structuredProperties.hello")); + "structuredProperties._versioned.ab_fgh_ten.00000000000001.string.keyword", + "structuredProperties._versioned.hello.00000000000001.string.keyword")); } @Test diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java index 2f68f17dae241..9376552f7abc5 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java @@ -5,7 +5,6 @@ import static org.testng.Assert.assertTrue; import com.linkedin.metadata.TestEntitySpecBuilder; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.custom.AutocompleteConfiguration; import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; @@ -34,9 +33,7 @@ public class AutocompleteRequestHandlerTest { private AutocompleteRequestHandler handler = AutocompleteRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), - CustomSearchConfiguration.builder().build(), - TestOperationContexts.emptyAspectRetriever(null)); + TestEntitySpecBuilder.getSpec(), CustomSearchConfiguration.builder().build()); private OperationContext mockOpContext = TestOperationContexts.systemContextNoSearchAuthorization(mock(EntityRegistry.class)); @@ -173,8 +170,7 @@ public void testCustomConfigWithDefault() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build(), - mock(AspectRetriever.class)); + .build()); SearchRequest autocompleteRequest = withoutDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -199,8 +195,7 @@ public void testCustomConfigWithDefault() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build(), - mock(AspectRetriever.class)); + .build()); autocompleteRequest = withDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); sourceBuilder = autocompleteRequest.source(); @@ -242,8 +237,7 @@ public void testCustomConfigWithInheritedQueryFunctionScores() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build(), - mock(AspectRetriever.class)); + .build()); SearchRequest autocompleteRequest = withInherit.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -281,8 +275,7 @@ public void testCustomConfigWithInheritedQueryFunctionScores() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build(), - mock(AspectRetriever.class)); + .build()); autocompleteRequest = noQueryCustomization.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -344,8 +337,7 @@ public void testCustomConfigWithFunctionScores() { Map.of( "deprecated", Map.of("value", false))))))) .build())) - .build(), - mock(AspectRetriever.class)); + .build()); SearchRequest autocompleteRequest = explicitNoInherit.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -397,8 +389,7 @@ public void testCustomConfigWithFunctionScores() { Map.of( "deprecated", Map.of("value", false))))))) .build())) - .build(), - mock(AspectRetriever.class)); + .build()); autocompleteRequest = explicit.getSearchRequest(mockOpContext, "input", null, null, 10); sourceBuilder = autocompleteRequest.source(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index 1b41ff44bc969..1cd9a274463d3 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -86,11 +86,7 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests { public void testDatasetFieldsAndHighlights() { EntitySpec entitySpec = operationContext.getEntityRegistry().getEntitySpec("dataset"); SearchRequestHandler datasetHandler = - SearchRequestHandler.getBuilder( - entitySpec, - testQueryConfig, - null, - operationContext.getRetrieverContext().get().getAspectRetriever()); + SearchRequestHandler.getBuilder(entitySpec, testQueryConfig, null); /* Ensure efficient query performance, we do not expect upstream/downstream/fineGrained lineage @@ -109,11 +105,7 @@ public void testDatasetFieldsAndHighlights() { @Test public void testSearchRequestHandlerHighlightingTurnedOff() { SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), - testQueryConfig, - null, - operationContext.getRetrieverContext().get().getAspectRetriever()); + SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); SearchRequest searchRequest = requestHandler.getSearchRequest( operationContext.withSearchFlags( @@ -153,11 +145,7 @@ public void testSearchRequestHandlerHighlightingTurnedOff() { @Test public void testSearchRequestHandler() { SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), - testQueryConfig, - null, - operationContext.getRetrieverContext().get().getAspectRetriever()); + SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); SearchRequest searchRequest = requestHandler.getSearchRequest( operationContext.withSearchFlags( @@ -220,11 +208,7 @@ public void testSearchRequestHandler() { @Test public void testAggregationsInSearch() { SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), - testQueryConfig, - null, - operationContext.getRetrieverContext().get().getAspectRetriever()); + SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); final String nestedAggString = String.format("_entityType%stextFieldOverride", AGGREGATION_SEPARATOR_CHAR); SearchRequest searchRequest = @@ -292,11 +276,7 @@ public void testAggregationsInSearch() { public void testFilteredSearch() { final SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), - testQueryConfig, - null, - operationContext.getRetrieverContext().get().getAspectRetriever()); + SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); final BoolQueryBuilder testQuery = constructFilterQuery(requestHandler, false); @@ -675,11 +655,7 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { .setAnd(new CriterionArray(ImmutableList.of(filterCriterion))))); final SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), - testQueryConfig, - null, - operationContext.getRetrieverContext().get().getAspectRetriever()); + SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); return (BoolQueryBuilder) requestHandler diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java index 9953e08efb2d2..def14f9be7054 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java @@ -21,6 +21,7 @@ import com.linkedin.metadata.TestEntityUtil; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.entity.SearchRetriever; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableRefFieldSpec; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; @@ -186,6 +187,7 @@ public void testSetSearchableRefValue() throws URISyntaxException, RemoteInvocat RetrieverContext.builder() .aspectRetriever(aspectRetriever) .graphRetriever(mock(GraphRetriever.class)) + .searchRetriever(mock(SearchRetriever.class)) .build()); searchDocumentTransformer.setSearchableRefValue( @@ -241,6 +243,7 @@ public void testSetSearchableRefValue_RuntimeException() RetrieverContext.builder() .aspectRetriever(aspectRetriever) .graphRetriever(mock(GraphRetriever.class)) + .searchRetriever(mock(SearchRetriever.class)) .build()); ObjectNode searchDocument = JsonNodeFactory.instance.objectNode(); @@ -277,6 +280,7 @@ public void testSetSearchableRefValue_RuntimeException_URNExist() RetrieverContext.builder() .aspectRetriever(aspectRetriever) .graphRetriever(mock(GraphRetriever.class)) + .searchRetriever(mock(SearchRetriever.class)) .build()); ObjectNode searchDocument = JsonNodeFactory.instance.objectNode(); @@ -309,6 +313,7 @@ void testSetSearchableRefValue_WithInvalidURN() RetrieverContext.builder() .aspectRetriever(aspectRetriever) .graphRetriever(mock(GraphRetriever.class)) + .searchRetriever(mock(SearchRetriever.class)) .build()); ObjectNode searchDocument = JsonNodeFactory.instance.objectNode(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java index 838df98fdce9c..d56d9b0674884 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java @@ -1,18 +1,81 @@ package com.linkedin.metadata.search.utils; +import static com.linkedin.metadata.Constants.DATA_TYPE_URN_PREFIX; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; +import static org.mockito.ArgumentMatchers.anySet; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + import com.google.common.collect.ImmutableList; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.SetMode; import com.linkedin.data.template.StringArray; +import com.linkedin.entity.Aspect; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.r2.RemoteInvocationException; +import com.linkedin.structured.StructuredPropertyDefinition; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.net.URISyntaxException; import java.util.HashMap; +import java.util.Map; +import java.util.Set; import org.opensearch.index.query.QueryBuilder; import org.testng.Assert; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public class ESUtilsTest { private static final String FIELD_TO_EXPAND = "fieldTags"; + private static AspectRetriever aspectRetriever; + private static AspectRetriever aspectRetrieverV1; + + @BeforeClass + public static void setup() throws RemoteInvocationException, URISyntaxException { + Urn abFghTenUrn = Urn.createFromString("urn:li:structuredProperty:ab.fgh.ten"); + + // legacy + aspectRetriever = mock(AspectRetriever.class); + when(aspectRetriever.getEntityRegistry()) + .thenReturn(TestOperationContexts.defaultEntityRegistry()); + + StructuredPropertyDefinition structPropAbFghTenDefinition = new StructuredPropertyDefinition(); + structPropAbFghTenDefinition.setVersion(null, SetMode.REMOVE_IF_NULL); + structPropAbFghTenDefinition.setValueType( + Urn.createFromString(DATA_TYPE_URN_PREFIX + "string")); + structPropAbFghTenDefinition.setQualifiedName("ab.fgh.ten"); + when(aspectRetriever.getLatestAspectObjects(eq(Set.of(abFghTenUrn)), anySet())) + .thenReturn( + Map.of( + abFghTenUrn, + Map.of( + STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, + new Aspect(structPropAbFghTenDefinition.data())))); + + // V1 + aspectRetrieverV1 = mock(AspectRetriever.class); + when(aspectRetrieverV1.getEntityRegistry()) + .thenReturn(TestOperationContexts.defaultEntityRegistry()); + + StructuredPropertyDefinition structPropAbFghTenDefinitionV1 = + new StructuredPropertyDefinition(); + structPropAbFghTenDefinitionV1.setVersion("00000000000001"); + structPropAbFghTenDefinitionV1.setValueType( + Urn.createFromString(DATA_TYPE_URN_PREFIX + "string")); + structPropAbFghTenDefinitionV1.setQualifiedName("ab.fgh.ten"); + when(aspectRetrieverV1.getLatestAspectObjects(eq(Set.of(abFghTenUrn)), anySet())) + .thenReturn( + Map.of( + abFghTenUrn, + Map.of( + STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, + new Aspect(structPropAbFghTenDefinitionV1.data())))); + } + @Test public void testGetQueryBuilderFromCriterionEqualsValues() { @@ -23,7 +86,8 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { .setValues(new StringArray(ImmutableList.of("value1"))); QueryBuilder result = - ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); String expected = "{\n" + " \"terms\" : {\n" @@ -42,7 +106,9 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1", "value2"))); - result = ESUtils.getQueryBuilderFromCriterion(multiValueCriterion, false, new HashMap<>()); + result = + ESUtils.getQueryBuilderFromCriterion( + multiValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); expected = "{\n" + " \"terms\" : {\n" @@ -62,7 +128,9 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1", "value2"))); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); + result = + ESUtils.getQueryBuilderFromCriterion( + timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); expected = "{\n" + " \"terms\" : {\n" @@ -83,7 +151,8 @@ public void testGetQueryBuilderFromCriterionExists() { new Criterion().setField("myTestField").setCondition(Condition.EXISTS); QueryBuilder result = - ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); String expected = "{\n" + " \"bool\" : {\n" @@ -106,7 +175,9 @@ public void testGetQueryBuilderFromCriterionExists() { final Criterion timeseriesField = new Criterion().setField("myTestField").setCondition(Condition.EXISTS); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); + result = + ESUtils.getQueryBuilderFromCriterion( + timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); expected = "{\n" + " \"bool\" : {\n" @@ -132,7 +203,8 @@ public void testGetQueryBuilderFromCriterionIsNull() { new Criterion().setField("myTestField").setCondition(Condition.IS_NULL); QueryBuilder result = - ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); String expected = "{\n" + " \"bool\" : {\n" @@ -155,7 +227,9 @@ public void testGetQueryBuilderFromCriterionIsNull() { final Criterion timeseriesField = new Criterion().setField("myTestField").setCondition(Condition.IS_NULL); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); + result = + ESUtils.getQueryBuilderFromCriterion( + timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); expected = "{\n" + " \"bool\" : {\n" @@ -187,7 +261,8 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { // Ensure that the query is expanded! QueryBuilder result = - ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); String expected = "{\n" + " \"bool\" : {\n" @@ -225,7 +300,9 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { .setValues(new StringArray(ImmutableList.of("value1", "value2"))); // Ensure that the query is expanded without keyword. - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); + result = + ESUtils.getQueryBuilderFromCriterion( + timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); expected = "{\n" + " \"bool\" : {\n" @@ -268,15 +345,41 @@ public void testGetQueryBuilderFromStructPropEqualsValue() { .setValues(new StringArray(ImmutableList.of("value1"))); QueryBuilder result = - ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), aspectRetriever); + String expected = + "{\n" + + " \"terms\" : {\n" + + " \"structuredProperties.ab_fgh_ten.keyword\" : [\n" + + " \"value1\"\n" + + " ],\n" + + " \"boost\" : 1.0,\n" + + " \"_name\" : \"structuredProperties.ab.fgh.ten\"\n" + + " }\n" + + "}"; + Assert.assertEquals(result.toString(), expected); + } + + @Test + public void testGetQueryBuilderFromStructPropEqualsValueV1() { + + final Criterion singleValueCriterion = + new Criterion() + .setField("structuredProperties.ab.fgh.ten") + .setCondition(Condition.EQUAL) + .setValues(new StringArray(ImmutableList.of("value1"))); + + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), aspectRetrieverV1); String expected = "{\n" + " \"terms\" : {\n" - + " \"structuredProperties.ab_fgh_ten\" : [\n" + + " \"structuredProperties._versioned.ab_fgh_ten.00000000000001.string.keyword\" : [\n" + " \"value1\"\n" + " ],\n" + " \"boost\" : 1.0,\n" - + " \"_name\" : \"structuredProperties.ab_fgh_ten\"\n" + + " \"_name\" : \"structuredProperties.ab.fgh.ten\"\n" + " }\n" + "}"; Assert.assertEquals(result.toString(), expected); @@ -288,7 +391,8 @@ public void testGetQueryBuilderFromStructPropExists() { new Criterion().setField("structuredProperties.ab.fgh.ten").setCondition(Condition.EXISTS); QueryBuilder result = - ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), aspectRetriever); String expected = "{\n" + " \"bool\" : {\n" @@ -302,7 +406,59 @@ public void testGetQueryBuilderFromStructPropExists() { + " ],\n" + " \"adjust_pure_negative\" : true,\n" + " \"boost\" : 1.0,\n" - + " \"_name\" : \"structuredProperties.ab_fgh_ten\"\n" + + " \"_name\" : \"structuredProperties.ab.fgh.ten\"\n" + + " }\n" + + "}"; + Assert.assertEquals(result.toString(), expected); + + // No diff in the timeseries field case for this condition. + final Criterion timeseriesField = + new Criterion().setField("myTestField").setCondition(Condition.EXISTS); + + result = + ESUtils.getQueryBuilderFromCriterion( + timeseriesField, true, new HashMap<>(), aspectRetriever); + expected = + "{\n" + + " \"bool\" : {\n" + + " \"must\" : [\n" + + " {\n" + + " \"exists\" : {\n" + + " \"field\" : \"myTestField\",\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + " ],\n" + + " \"adjust_pure_negative\" : true,\n" + + " \"boost\" : 1.0,\n" + + " \"_name\" : \"myTestField\"\n" + + " }\n" + + "}"; + Assert.assertEquals(result.toString(), expected); + } + + @Test + public void testGetQueryBuilderFromStructPropExistsV1() { + final Criterion singleValueCriterion = + new Criterion().setField("structuredProperties.ab.fgh.ten").setCondition(Condition.EXISTS); + + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), aspectRetrieverV1); + String expected = + "{\n" + + " \"bool\" : {\n" + + " \"must\" : [\n" + + " {\n" + + " \"exists\" : {\n" + + " \"field\" : \"structuredProperties._versioned.ab_fgh_ten.00000000000001.string\",\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + " ],\n" + + " \"adjust_pure_negative\" : true,\n" + + " \"boost\" : 1.0,\n" + + " \"_name\" : \"structuredProperties.ab.fgh.ten\"\n" + " }\n" + "}"; Assert.assertEquals(result.toString(), expected); @@ -311,7 +467,9 @@ public void testGetQueryBuilderFromStructPropExists() { final Criterion timeseriesField = new Criterion().setField("myTestField").setCondition(Condition.EXISTS); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); + result = + ESUtils.getQueryBuilderFromCriterion( + timeseriesField, true, new HashMap<>(), aspectRetrieverV1); expected = "{\n" + " \"bool\" : {\n" diff --git a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java new file mode 100644 index 0000000000000..ab205d0463c4c --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java @@ -0,0 +1,193 @@ +package com.linkedin.metadata.structuredproperties.hooks; + +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_KEY_ASPECT_NAME; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.ArgumentMatchers.nullable; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; + +import com.linkedin.common.UrnArray; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.entity.Aspect; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.batch.PatchMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCL; +import io.datahubproject.metadata.context.RetrieverContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import jakarta.json.Json; +import jakarta.json.JsonPatch; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class PropertyDefinitionDeleteSideEffectTest { + private static final EntityRegistry TEST_REGISTRY = new TestEntityRegistry(); + private static final AspectPluginConfig TEST_PLUGIN_CONFIG = + AspectPluginConfig.builder() + .className(PropertyDefinitionDeleteSideEffect.class.getName()) + .enabled(true) + .supportedOperations(List.of("DELETE")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("structuredProperty") + .aspectName(STRUCTURED_PROPERTY_KEY_ASPECT_NAME) + .build(), + AspectPluginConfig.EntityAspectName.builder() + .entityName("structuredProperty") + .aspectName(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) + .build())) + .build(); + + private static final Urn TEST_PROPERTY_URN = + UrnUtils.getUrn("urn:li:structuredProperty:io.acryl.privacy.retentionTime"); + private static final StructuredPropertyDefinition TEST_PROPERTY_DEFINITION = + new StructuredPropertyDefinition() + .setValueType(UrnUtils.getUrn("urn:li:type:datahub.string")) + .setVersion("00000000000001") + .setEntityTypes( + new UrnArray(List.of(UrnUtils.getUrn("urn:li:entityType:datahub.dataset")))) + .setQualifiedName("io.acryl.privacy.retentionTime"); + private static final Urn TEST_DATASET_URN = + UrnUtils.getUrn( + "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.customers,PROD)"); + private AspectRetriever mockAspectRetriever; + private SearchRetriever mockSearchRetriever; + private RetrieverContext retrieverContext; + + @BeforeMethod + public void setup() { + mockAspectRetriever = mock(AspectRetriever.class); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(TEST_REGISTRY); + when(mockAspectRetriever.getLatestAspectObject( + eq(TEST_PROPERTY_URN), eq(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME))) + .thenReturn(new Aspect(TEST_PROPERTY_DEFINITION.data())); + + mockSearchRetriever = mock(SearchRetriever.class); + ScrollResult scrollResult = new ScrollResult(); + scrollResult.setPageSize(1); + scrollResult.setNumEntities(1); + scrollResult.setEntities( + new SearchEntityArray(List.of(new SearchEntity().setEntity(TEST_DATASET_URN)))); + when(mockSearchRetriever.scroll( + eq(List.of("dataset")), eq(expectedFilter()), nullable(String.class), anyInt())) + .thenReturn(scrollResult); + + retrieverContext = + RetrieverContext.builder() + .searchRetriever(mockSearchRetriever) + .aspectRetriever(mockAspectRetriever) + .graphRetriever(TestOperationContexts.emptyGraphRetriever) + .build(); + } + + @Test + public void testDeletePropertyKey() { + PropertyDefinitionDeleteSideEffect test = new PropertyDefinitionDeleteSideEffect(); + test.setConfig(TEST_PLUGIN_CONFIG); + + List result = + test.postMCPSideEffect( + Set.of( + TestMCL.builder() + .changeType(ChangeType.DELETE) + .urn(TEST_PROPERTY_URN) + .entitySpec(TEST_REGISTRY.getEntitySpec("structuredProperty")) + .aspectSpec( + TEST_REGISTRY + .getEntitySpec("structuredProperty") + .getAspectSpec(STRUCTURED_PROPERTY_KEY_ASPECT_NAME)) + .build()), + retrieverContext) + .collect(Collectors.toList()); + + assertEquals(1, result.size()); + + verify(mockAspectRetriever, times(1)) + .getLatestAspectObject( + eq(TEST_PROPERTY_URN), eq(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)); + verify(mockSearchRetriever, times(1)) + .scroll(eq(List.of("dataset")), eq(expectedFilter()), nullable(String.class), anyInt()); + + JsonPatch expectedPatch = + Json.createPatchBuilder().remove("/properties/" + TEST_PROPERTY_URN).build(); + assertEquals(((PatchMCP) result.get(0)).getPatch(), expectedPatch); + } + + @Test + public void testDeletePropertyDefinition() { + PropertyDefinitionDeleteSideEffect test = new PropertyDefinitionDeleteSideEffect(); + test.setConfig(TEST_PLUGIN_CONFIG); + + List result = + test.postMCPSideEffect( + Set.of( + TestMCL.builder() + .changeType(ChangeType.DELETE) + .urn(TEST_PROPERTY_URN) + .entitySpec(TEST_REGISTRY.getEntitySpec("structuredProperty")) + .aspectSpec( + TEST_REGISTRY + .getEntitySpec("structuredProperty") + .getAspectSpec(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) + .previousRecordTemplate(TEST_PROPERTY_DEFINITION) + .build()), + retrieverContext) + .collect(Collectors.toList()); + + assertEquals(1, result.size()); + + verify(mockAspectRetriever, times(0)).getLatestAspectObject(any(), any()); + verify(mockAspectRetriever, times(0)).getLatestAspectObjects(any(), any()); + verify(mockSearchRetriever, times(1)) + .scroll(eq(List.of("dataset")), eq(expectedFilter()), nullable(String.class), anyInt()); + + JsonPatch expectedPatch = + Json.createPatchBuilder().remove("/properties/" + TEST_PROPERTY_URN).build(); + assertEquals(((PatchMCP) result.get(0)).getPatch(), expectedPatch); + } + + private static Filter expectedFilter() { + Filter propertyFilter = new Filter(); + final ConjunctiveCriterionArray disjunction = new ConjunctiveCriterionArray(); + final ConjunctiveCriterion conjunction = new ConjunctiveCriterion(); + final CriterionArray andCriterion = new CriterionArray(); + + final Criterion propertyExistsCriterion = new Criterion(); + propertyExistsCriterion.setField( + "structuredProperties._versioned.io_acryl_privacy_retentionTime.00000000000001.string"); + propertyExistsCriterion.setCondition(Condition.EXISTS); + + andCriterion.add(propertyExistsCriterion); + conjunction.setAnd(andCriterion); + disjunction.add(conjunction); + propertyFilter.setOr(disjunction); + + return propertyFilter; + } +} diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/StructuredPropertiesSoftDeleteTest.java similarity index 98% rename from entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/StructuredPropertiesSoftDeleteTest.java index e1e84f5728540..9109eeb7f96a5 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/StructuredPropertiesSoftDeleteTest.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.aspect.hooks; +package com.linkedin.metadata.structuredproperties.hooks; import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; import static org.mockito.Mockito.mock; diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/PropertyDefinitionValidatorTest.java similarity index 70% rename from entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/PropertyDefinitionValidatorTest.java index 841cbf5a77bec..22224f16f2210 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/PropertyDefinitionValidatorTest.java @@ -1,34 +1,27 @@ -package com.linkedin.metadata.aspect.validators; +package com.linkedin.metadata.structuredproperties.validators; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import static org.testng.Assert.*; +import static org.testng.AssertJUnit.assertEquals; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.Constants; +import com.linkedin.data.template.SetMode; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.GraphRetriever; import com.linkedin.metadata.aspect.RetrieverContext; -import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; -import com.linkedin.metadata.aspect.validation.PropertyDefinitionValidator; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.structuredproperties.validation.PropertyDefinitionValidator; import com.linkedin.structured.PrimitivePropertyValue; import com.linkedin.structured.PropertyCardinality; import com.linkedin.structured.PropertyValue; import com.linkedin.structured.PropertyValueArray; import com.linkedin.structured.StructuredPropertyDefinition; -import com.linkedin.structured.StructuredPropertyKey; import com.linkedin.test.metadata.aspect.TestEntityRegistry; import com.linkedin.test.metadata.aspect.batch.TestMCP; import java.net.URISyntaxException; -import java.util.List; -import java.util.Set; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; @@ -109,6 +102,7 @@ public void testCannotChangeMultipleToSingle() oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); StructuredPropertyDefinition newProperty = oldProperty.copy(); newProperty.setCardinality(PropertyCardinality.SINGLE); + newProperty.setVersion(null, SetMode.REMOVE_IF_NULL); assertEquals( PropertyDefinitionValidator.validateDefinitionUpserts( TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), @@ -117,6 +111,30 @@ public void testCannotChangeMultipleToSingle() 1); } + @Test + public void testCanChangeMultipleToSingleWithNewVersion() + throws URISyntaxException, CloneNotSupportedException { + StructuredPropertyDefinition oldProperty = new StructuredPropertyDefinition(); + oldProperty.setEntityTypes( + new UrnArray( + Urn.createFromString("urn:li:logicalEntity:dataset"), + Urn.createFromString("urn:li:logicalEntity:chart"), + Urn.createFromString("urn:li:logicalEntity:glossaryTerm"))); + oldProperty.setDisplayName("oldProp"); + oldProperty.setQualifiedName("prop3"); + oldProperty.setCardinality(PropertyCardinality.MULTIPLE); + oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + StructuredPropertyDefinition newProperty = oldProperty.copy(); + newProperty.setCardinality(PropertyCardinality.SINGLE); + newProperty.setVersion("00000000000001"); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockRetrieverContext) + .count(), + 0); + } + @Test public void testCannotChangeValueType() throws URISyntaxException, CloneNotSupportedException { StructuredPropertyDefinition oldProperty = new StructuredPropertyDefinition(); @@ -131,6 +149,7 @@ public void testCannotChangeValueType() throws URISyntaxException, CloneNotSuppo oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); StructuredPropertyDefinition newProperty = oldProperty.copy(); newProperty.setValueType(Urn.createFromString("urn:li:logicalType:NUMBER")); + newProperty.setVersion(null, SetMode.REMOVE_IF_NULL); assertEquals( PropertyDefinitionValidator.validateDefinitionUpserts( TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), @@ -139,6 +158,30 @@ public void testCannotChangeValueType() throws URISyntaxException, CloneNotSuppo 1); } + @Test + public void testCanChangeValueTypeWithNewVersion() + throws URISyntaxException, CloneNotSupportedException { + StructuredPropertyDefinition oldProperty = new StructuredPropertyDefinition(); + oldProperty.setEntityTypes( + new UrnArray( + Urn.createFromString("urn:li:logicalEntity:dataset"), + Urn.createFromString("urn:li:logicalEntity:chart"), + Urn.createFromString("urn:li:logicalEntity:glossaryTerm"))); + oldProperty.setDisplayName("oldProp"); + oldProperty.setQualifiedName("prop3"); + oldProperty.setCardinality(PropertyCardinality.MULTIPLE); + oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + StructuredPropertyDefinition newProperty = oldProperty.copy(); + newProperty.setValueType(Urn.createFromString("urn:li:logicalType:NUMBER")); + newProperty.setVersion("00000000000001"); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockRetrieverContext) + .count(), + 0); + } + @Test public void testCanChangeDisplayName() throws URISyntaxException, CloneNotSupportedException, AspectValidationException { @@ -185,6 +228,30 @@ public void testCannotChangeFullyQualifiedName() 1); } + @Test + public void testCannotChangeFullyQualifiedNameWithVersionChange() + throws URISyntaxException, CloneNotSupportedException { + StructuredPropertyDefinition oldProperty = new StructuredPropertyDefinition(); + oldProperty.setEntityTypes( + new UrnArray( + Urn.createFromString("urn:li:logicalEntity:dataset"), + Urn.createFromString("urn:li:logicalEntity:chart"), + Urn.createFromString("urn:li:logicalEntity:glossaryTerm"))); + oldProperty.setDisplayName("oldProp"); + oldProperty.setQualifiedName("prop3"); + oldProperty.setCardinality(PropertyCardinality.MULTIPLE); + oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + StructuredPropertyDefinition newProperty = oldProperty.copy(); + newProperty.setQualifiedName("newProp"); + newProperty.setVersion("00000000000001"); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockRetrieverContext) + .count(), + 1); + } + @Test public void testCannotChangeRestrictAllowedValues() throws URISyntaxException, CloneNotSupportedException { @@ -203,6 +270,7 @@ public void testCannotChangeRestrictAllowedValues() PropertyValue allowedValue = new PropertyValue().setValue(PrimitivePropertyValue.create(1.0)).setDescription("hello"); newProperty.setAllowedValues(new PropertyValueArray(allowedValue)); + newProperty.setVersion(null, SetMode.REMOVE_IF_NULL); assertEquals( PropertyDefinitionValidator.validateDefinitionUpserts( TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), @@ -222,6 +290,46 @@ public void testCannotChangeRestrictAllowedValues() 1); } + @Test + public void testCanChangeRestrictAllowedValuesWithVersionChange() + throws URISyntaxException, CloneNotSupportedException { + // No constraint -> constraint case + StructuredPropertyDefinition oldProperty = new StructuredPropertyDefinition(); + oldProperty.setEntityTypes( + new UrnArray( + Urn.createFromString("urn:li:logicalEntity:dataset"), + Urn.createFromString("urn:li:logicalEntity:chart"), + Urn.createFromString("urn:li:logicalEntity:glossaryTerm"))); + oldProperty.setDisplayName("oldProp"); + oldProperty.setQualifiedName("prop3"); + oldProperty.setCardinality(PropertyCardinality.MULTIPLE); + oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + + StructuredPropertyDefinition newProperty = oldProperty.copy(); + newProperty.setVersion("00000000000001"); + PropertyValue allowedValue = + new PropertyValue().setValue(PrimitivePropertyValue.create(1.0)).setDescription("hello"); + newProperty.setAllowedValues(new PropertyValueArray(allowedValue)); + + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockRetrieverContext) + .count(), + 0); + + // Remove allowed values from constraint case + PropertyValue oldAllowedValue = + new PropertyValue().setValue(PrimitivePropertyValue.create(3.0)).setDescription("hello"); + oldProperty.setAllowedValues((new PropertyValueArray(allowedValue, oldAllowedValue))); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockRetrieverContext) + .count(), + 0); + } + @Test public void testCanExpandAllowedValues() throws URISyntaxException, CloneNotSupportedException, AspectValidationException { @@ -289,61 +397,4 @@ public void testCanChangeAllowedValueDescriptions() .count(), 0); } - - @Test - public void testHardDeleteBlock() { - PropertyDefinitionValidator test = - new PropertyDefinitionValidator() - .setConfig( - AspectPluginConfig.builder() - .enabled(true) - .className(PropertyDefinitionValidator.class.getName()) - .supportedOperations(List.of("DELETE")) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) - .aspectName(Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) - .build(), - AspectPluginConfig.EntityAspectName.builder() - .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) - .aspectName("structuredPropertyKey") - .build())) - .build()); - - assertEquals( - test.validateProposed( - Set.of( - TestMCP.builder() - .changeType(ChangeType.DELETE) - .urn(UrnUtils.getUrn("urn:li:structuredProperty:foo.bar")) - .entitySpec(entityRegistry.getEntitySpec("structuredProperty")) - .aspectSpec( - entityRegistry - .getEntitySpec(STRUCTURED_PROPERTY_ENTITY_NAME) - .getKeyAspectSpec()) - .recordTemplate(new StructuredPropertyKey()) - .build()), - mockRetrieverContext) - .count(), - 1); - - assertEquals( - test.validateProposed( - Set.of( - TestMCP.builder() - .changeType(ChangeType.DELETE) - .urn(UrnUtils.getUrn("urn:li:structuredProperty:foo.bar")) - .entitySpec(entityRegistry.getEntitySpec("structuredProperty")) - .aspectSpec( - entityRegistry - .getEntitySpec(STRUCTURED_PROPERTY_ENTITY_NAME) - .getAspectSpecMap() - .get(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) - .recordTemplate(new StructuredPropertyDefinition()) - .build()), - mockRetrieverContext) - .count(), - 1); - } } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/StructuredPropertiesValidatorTest.java similarity index 99% rename from entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/StructuredPropertiesValidatorTest.java index 77cf453f517be..7a2b8fd69f368 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/StructuredPropertiesValidatorTest.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.aspect.validators; +package com.linkedin.metadata.structuredproperties.validators; import static org.testng.Assert.assertEquals; @@ -7,8 +7,8 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; -import com.linkedin.metadata.aspect.validation.StructuredPropertiesValidator; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.structuredproperties.validation.StructuredPropertiesValidator; import com.linkedin.structured.PrimitivePropertyValue; import com.linkedin.structured.PrimitivePropertyValueArray; import com.linkedin.structured.PropertyValue; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java index 8643855162fa7..7067dd3a6763e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java @@ -11,6 +11,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; import com.linkedin.mxe.SystemMetadata; +import java.util.Collections; import java.util.List; import javax.annotation.Nonnull; import org.opensearch.client.RestHighLevelClient; @@ -38,7 +39,7 @@ public abstract class SystemMetadataServiceTestBase extends AbstractTestNGSpring @BeforeClass public void setup() { _client = buildService(); - _client.configure(); + _client.reindexAll(Collections.emptySet()); } @BeforeMethod diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index 3ccd085cab9c0..b44f01d90dae4 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -58,6 +58,7 @@ import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URISyntaxException; import java.util.Calendar; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.function.Function; @@ -128,7 +129,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException { entityRegistry, new IndexConventionImpl("es_timeseries_aspect_service_test")); elasticSearchTimeseriesAspectService = buildService(); - elasticSearchTimeseriesAspectService.configure(); + elasticSearchTimeseriesAspectService.reindexAll(Collections.emptySet()); EntitySpec entitySpec = entityRegistry.getEntitySpec(ENTITY_NAME); aspectSpec = entitySpec.getAspectSpec(ASPECT_NAME); } diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java index 5da970b46afc7..28a4a2b00cd6f 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java @@ -40,6 +40,7 @@ import io.datahubproject.test.metadata.context.TestOperationContexts; import io.datahubproject.test.search.config.SearchCommonTestConfiguration; import java.io.IOException; +import java.util.Collections; import java.util.Map; import java.util.Optional; import java.util.Set; @@ -149,6 +150,7 @@ protected EntityIndexBuilders entityIndexBuildersHelper(OperationContext opConte Map.of(), true, false, + false, new ElasticSearchConfiguration(), gitVersion); SettingsBuilder settingsBuilder = new SettingsBuilder(null); @@ -252,7 +254,7 @@ public SearchService searchServiceHelper( ranker); // Build indices & write fixture data - indexBuilders.reindexAll(); + indexBuilders.reindexAll(Collections.emptySet()); FixtureReader.builder() .bulkProcessor(_bulkProcessor) diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java index 34598821f43fd..4cd818db34bf4 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java @@ -39,6 +39,7 @@ import io.datahubproject.test.search.config.SearchCommonTestConfiguration; import io.datahubproject.test.search.config.SearchTestContainerConfiguration; import java.io.IOException; +import java.util.Collections; import java.util.Map; import java.util.Optional; import javax.annotation.Nonnull; @@ -100,6 +101,7 @@ protected EntityIndexBuilders entityIndexBuilders( Map.of(), true, false, + false, new ElasticSearchConfiguration(), gitVersion); SettingsBuilder settingsBuilder = new SettingsBuilder(null); @@ -151,6 +153,7 @@ protected ESIndexBuilder esIndexBuilder() { Map.of(), true, true, + false, new ElasticSearchConfiguration(), gitVersion); } @@ -174,7 +177,7 @@ protected ElasticSearchGraphService graphService( indexConvention, GraphQueryConfiguration.testDefaults), indexBuilder); - graphService.configure(); + graphService.reindexAll(Collections.emptySet()); return graphService; } @@ -225,7 +228,7 @@ protected SearchService searchService( ranker); // Build indices - indexBuilders.reindexAll(); + indexBuilders.reindexAll(Collections.emptySet()); return service; } diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java index f1fd371fdf7bd..a71c40b70f2b4 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java +++ b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.AUTO_COMPLETE_ENTITY_TYPES; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.SEARCHABLE_ENTITY_TYPES; +import static org.mockito.Mockito.mock; import com.datahub.authentication.Authentication; import com.datahub.plugins.auth.authorization.Authorizer; @@ -13,6 +14,7 @@ import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.types.SearchableEntityType; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.graph.LineageDirection; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.LineageSearchResult; @@ -183,7 +185,7 @@ public static LineageSearchResult lineage( .collect(Collectors.toList()), "*", hops, - ResolverUtils.buildFilter(filters, List.of()), + ResolverUtils.buildFilter(filters, List.of(), mock(AspectRetriever.class)), null, 0, 100); diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchTestContainerConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchTestContainerConfiguration.java index 0ddfd77399325..ab6644ce6ff6d 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchTestContainerConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchTestContainerConfiguration.java @@ -93,6 +93,7 @@ protected ESIndexBuilder getIndexBuilder( Map.of(), false, false, + false, new ElasticSearchConfiguration(), gitVersion); } diff --git a/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java b/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java index 35693ec96b3de..489c424f565dd 100644 --- a/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java +++ b/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java @@ -23,6 +23,7 @@ "com.linkedin.metadata.dao.producer", "com.linkedin.gms.factory.config", "com.linkedin.gms.factory.entity.update.indices", + "com.linkedin.gms.factory.search", "com.linkedin.gms.factory.entityclient", "com.linkedin.gms.factory.form", "com.linkedin.gms.factory.incident", diff --git a/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java b/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java index b3f81551c830a..47740b02d6166 100644 --- a/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java +++ b/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java @@ -153,7 +153,7 @@ private void testMCLOnInvalidCategory() throws Exception { // verify Mockito.verifyNoInteractions(opContext.getRetrieverContext().get().getGraphRetriever()); - Mockito.verifyNoInteractions(opContext.getRetrieverContext().get().getAspectRetriever()); + Mockito.verifyNoInteractions(opContext.getAspectRetrieverOpt().get()); Mockito.verifyNoInteractions(mockUpdateIndicesService); } @@ -232,7 +232,7 @@ private OperationContext mockOperationContextWithGraph(List graph TestOperationContexts.systemContextNoSearchAuthorization(mockRetrieverContext); // reset mock for test - reset(opContext.getRetrieverContext().get().getAspectRetriever()); + reset(opContext.getAspectRetrieverOpt().get()); if (!graphEdges.isEmpty()) { diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java index de9d3419c216e..af70b56f8ec11 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java @@ -4,6 +4,8 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import java.util.Collection; @@ -33,6 +35,12 @@ protected Stream applyMCPSideEffect( }); } + @Override + protected Stream postMCPSideEffect( + Collection collection, @Nonnull RetrieverContext retrieverContext) { + return Stream.empty(); + } + @Nonnull @Override public AspectPluginConfig getConfig() { diff --git a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl index 178d7b3cf4376..bf0bf65099b2e 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl @@ -78,5 +78,13 @@ record StructuredPropertyDefinition { "fieldType": "BOOLEAN" } immutable: boolean = false + + /** + * Definition version - Allows breaking schema changes. String is compared case-insensitive and new + * versions must be monotonically increasing. Cannot use periods/dots. + * Suggestions: v1, v2 + * 20240610, 20240611 + */ + version: optional string } diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 1e8044e3b5f86..9c05c3d4851fc 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -581,17 +581,22 @@ entities: events: plugins: aspectPayloadValidators: - - className: 'com.linkedin.metadata.aspect.validation.PropertyDefinitionValidator' + - className: 'com.linkedin.metadata.structuredproperties.validation.PropertyDefinitionValidator' + packageScan: + - 'com.linkedin.metadata.structuredproperties.validation' enabled: true supportedOperations: + - CREATE + - CREATE_ENTITY - UPSERT - - DELETE supportedEntityAspectNames: - entityName: structuredProperty aspectName: propertyDefinition - entityName: structuredProperty aspectName: structuredPropertyKey - - className: 'com.linkedin.metadata.aspect.validation.StructuredPropertiesValidator' + - className: 'com.linkedin.metadata.structuredproperties.validation.StructuredPropertiesValidator' + packageScan: + - 'com.linkedin.metadata.structuredproperties.validation' enabled: true supportedOperations: - CREATE @@ -608,8 +613,22 @@ plugins: supportedEntityAspectNames: - entityName: '*' aspectName: '*' + mcpSideEffects: + - className: 'com.linkedin.metadata.structuredproperties.hooks.PropertyDefinitionDeleteSideEffect' + packageScan: + - 'com.linkedin.metadata.structuredproperties.hooks' + enabled: true + supportedOperations: + - DELETE + supportedEntityAspectNames: + - entityName: structuredProperty + aspectName: propertyDefinition + - entityName: structuredProperty + aspectName: structuredPropertyKey mutationHooks: - - className: 'com.linkedin.metadata.aspect.hooks.StructuredPropertiesSoftDelete' + - className: 'com.linkedin.metadata.structuredproperties.hooks.StructuredPropertiesSoftDelete' + packageScan: + - 'com.linkedin.metadata.structuredproperties.hooks' enabled: true supportedEntityAspectNames: - entityName: '*' diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java index 56247d61337e8..9928318268a3e 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java @@ -7,6 +7,7 @@ import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.LineageFlags; import com.linkedin.metadata.query.SearchFlags; @@ -285,6 +286,15 @@ public Optional getRetrieverContext() { return Optional.ofNullable(retrieverContext); } + @Nullable + public AspectRetriever getAspectRetriever() { + return getAspectRetrieverOpt().orElse(null); + } + + public Optional getAspectRetrieverOpt() { + return getRetrieverContext().map(RetrieverContext::getAspectRetriever); + } + /** * Return a unique id for this context. Typically useful for building cache keys. We combine the * different context components to create a single string representation of the hashcode across diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java index 0d7e6359b771c..9337fbfe3bb00 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java @@ -2,6 +2,7 @@ import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.entity.SearchRetriever; import java.util.Optional; import javax.annotation.Nonnull; import lombok.Builder; @@ -14,6 +15,7 @@ public class RetrieverContext @Nonnull private final GraphRetriever graphRetriever; @Nonnull private final AspectRetriever aspectRetriever; + @Nonnull private final SearchRetriever searchRetriever; @Override public Optional getCacheKeyComponent() { diff --git a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java index 4c28ba037d3a1..0e8c165468a4b 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java @@ -11,6 +11,7 @@ import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.GraphRetriever; import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; +import com.linkedin.metadata.entity.SearchRetriever; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistryException; @@ -19,6 +20,8 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.RelationshipFilter; import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.snapshot.Snapshot; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; @@ -55,7 +58,7 @@ public class TestOperationContexts { private static EntityRegistry defaultEntityRegistryInstance; - private static EntityRegistry defaultEntityRegistry() { + public static EntityRegistry defaultEntityRegistry() { if (defaultEntityRegistryInstance == null) { PathSpecBasedSchemaAnnotationVisitor.class .getClassLoader() @@ -84,6 +87,7 @@ public static AspectRetriever emptyAspectRetriever( } public static GraphRetriever emptyGraphRetriever = new EmptyGraphRetriever(); + public static SearchRetriever emptySearchRetriever = new EmptySearchRetriever(); public static RetrieverContext emptyRetrieverContext( @Nullable Supplier entityRegistrySupplier) { @@ -91,6 +95,7 @@ public static RetrieverContext emptyRetrieverContext( return RetrieverContext.builder() .aspectRetriever(emptyAspectRetriever(entityRegistrySupplier)) .graphRetriever(emptyGraphRetriever) + .searchRetriever(emptySearchRetriever) .build(); } @@ -121,6 +126,20 @@ public static OperationContext systemContextNoSearchAuthorization( null); } + public static OperationContext systemContextNoSearchAuthorization( + @Nullable AspectRetriever aspectRetriever) { + RetrieverContext retrieverContext = + RetrieverContext.builder() + .aspectRetriever(aspectRetriever) + .graphRetriever(emptyGraphRetriever) + .searchRetriever(emptySearchRetriever) + .build(); + return systemContextNoSearchAuthorization( + () -> retrieverContext.getAspectRetriever().getEntityRegistry(), + () -> retrieverContext, + null); + } + public static OperationContext systemContextNoSearchAuthorization( @Nullable Supplier retrieverContextSupplier, @Nullable IndexConvention indexConvention) { @@ -277,5 +296,21 @@ public RelatedEntitiesScrollResult scrollRelatedEntities( } } + public static class EmptySearchRetriever implements SearchRetriever { + + @Override + public ScrollResult scroll( + @Nonnull List entities, + @Nullable Filter filters, + @Nullable String scrollId, + int count) { + ScrollResult empty = new ScrollResult(); + empty.setEntities(new SearchEntityArray()); + empty.setNumEntities(0); + empty.setPageSize(0); + return empty; + } + } + private TestOperationContexts() {} } diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authentication/token/StatefulTokenServiceTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authentication/token/StatefulTokenServiceTest.java index bcea100ec733f..d8b7c23153537 100644 --- a/metadata-service/auth-impl/src/test/java/com/datahub/authentication/token/StatefulTokenServiceTest.java +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authentication/token/StatefulTokenServiceTest.java @@ -203,7 +203,8 @@ public void generateRevokeToken() throws TokenException { Mockito.when(mockService.exists(any(OperationContext.class), any(Urn.class), eq(true))) .thenReturn(true); - final RollbackRunResult result = new RollbackRunResult(ImmutableList.of(), 0); + final RollbackRunResult result = + new RollbackRunResult(ImmutableList.of(), 0, ImmutableList.of()); Mockito.when(mockService.deleteUrn(any(OperationContext.class), any(Urn.class))) .thenReturn(result); diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java index bb59949aa44bb..6870d10c87001 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java @@ -9,6 +9,8 @@ import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceAspectRetriever; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.SearchService; +import com.linkedin.metadata.search.SearchServiceSearchRetriever; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.OperationContextConfig; import io.datahubproject.metadata.context.RetrieverContext; @@ -42,7 +44,8 @@ protected OperationContext javaSystemOperationContext( @Nonnull final EntityRegistry entityRegistry, @Nonnull final EntityService entityService, @Nonnull final RestrictedService restrictedService, - @Nonnull final GraphRetriever graphRetriever) { + @Nonnull final GraphRetriever graphRetriever, + @Nonnull final SearchService searchService) { EntityServiceAspectRetriever entityServiceAspectRetriever = EntityServiceAspectRetriever.builder() @@ -50,6 +53,9 @@ protected OperationContext javaSystemOperationContext( .entityService(entityService) .build(); + SearchServiceSearchRetriever searchServiceSearchRetriever = + SearchServiceSearchRetriever.builder().searchService(searchService).build(); + OperationContext systemOperationContext = OperationContext.asSystem( operationContextConfig, @@ -60,15 +66,17 @@ protected OperationContext javaSystemOperationContext( RetrieverContext.builder() .aspectRetriever(entityServiceAspectRetriever) .graphRetriever(graphRetriever) + .searchRetriever(searchServiceSearchRetriever) .build()); entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext); + searchServiceSearchRetriever.setSystemOperationContext(systemOperationContext); return systemOperationContext; } /** - * Used outside of GMS + * Used outside GMS * *

Entity Client and Aspect Retriever implemented by Restli call to GMS Entity Client and * Aspect Retriever client-side caching enabled @@ -82,11 +90,15 @@ protected OperationContext restliSystemOperationContext( @Nonnull @Qualifier("systemAuthentication") final Authentication systemAuthentication, @Nonnull final OperationContextConfig operationContextConfig, @Nonnull final RestrictedService restrictedService, - @Nonnull final GraphRetriever graphRetriever) { + @Nonnull final GraphRetriever graphRetriever, + @Nonnull final SearchService searchService) { EntityClientAspectRetriever entityServiceAspectRetriever = EntityClientAspectRetriever.builder().entityClient(systemEntityClient).build(); + SearchServiceSearchRetriever searchServiceSearchRetriever = + SearchServiceSearchRetriever.builder().searchService(searchService).build(); + OperationContext systemOperationContext = OperationContext.asSystem( operationContextConfig, @@ -97,9 +109,11 @@ protected OperationContext restliSystemOperationContext( RetrieverContext.builder() .aspectRetriever(entityServiceAspectRetriever) .graphRetriever(graphRetriever) + .searchRetriever(searchServiceSearchRetriever) .build()); entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext); + searchServiceSearchRetriever.setSystemOperationContext(systemOperationContext); return systemOperationContext; } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java index ef7e8a8c6a8ac..4513d9f065dcb 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java @@ -57,6 +57,9 @@ public class ElasticSearchIndexBuilderFactory { @Value("#{new Boolean('${elasticsearch.index.enableMappingsReindex}')}") private boolean enableMappingsReindex; + @Value("#{new Boolean('${structuredProperties.systemUpdateEnabled}')}") + private boolean enableStructuredPropertiesReindex; + @Bean(name = "elasticSearchIndexSettingsOverrides") @Nonnull protected Map> getIndexSettingsOverrides( @@ -85,6 +88,7 @@ protected ESIndexBuilder getInstance( overrides, enableSettingsReindex, enableMappingsReindex, + enableStructuredPropertiesReindex, configurationProvider.getElasticSearch(), gitVersion); } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java index f242106e3fe4b..638d1a6fd0c8a 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java @@ -84,7 +84,7 @@ public void execute(@Nonnull OperationContext systemOperationContext) throws Exc .aspectName(DATA_PLATFORM_INSTANCE_ASPECT_NAME) .recordTemplate(dataPlatformInstance.get()) .auditStamp(aspectAuditStamp) - .build(systemOperationContext.getRetrieverContext().get().getAspectRetriever())); + .build(systemOperationContext.getAspectRetrieverOpt().get())); } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java index 648fd0f585319..bfd45f0a52df2 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java @@ -14,6 +14,7 @@ import com.datahub.util.RecordUtils; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.data.ByteString; import com.linkedin.data.template.RecordTemplate; @@ -133,7 +134,8 @@ protected abstract AspectsBatch toMCPBatch( @Operation(summary = "Scroll entities") public ResponseEntity getEntities( @PathVariable("entityName") String entityName, - @RequestParam(value = "aspectNames", defaultValue = "") Set aspectNames, + @RequestParam(value = "aspectNames", defaultValue = "") Set aspects1, + @RequestParam(value = "aspects", defaultValue = "") Set aspects2, @RequestParam(value = "count", defaultValue = "10") Integer count, @RequestParam(value = "query", defaultValue = "*") String query, @RequestParam(value = "scrollId", required = false) String scrollId, @@ -187,7 +189,7 @@ public ResponseEntity getEntities( buildScrollResult( opContext, result.getEntities(), - aspectNames, + ImmutableSet.builder().addAll(aspects1).addAll(aspects2).build(), withSystemMetadata, result.getScrollId())); } @@ -199,7 +201,8 @@ public ResponseEntity getEntities( public ResponseEntity getEntity( @PathVariable("entityName") String entityName, @PathVariable("entityUrn") String entityUrn, - @RequestParam(value = "aspectNames", defaultValue = "") Set aspectNames, + @RequestParam(value = "aspectNames", defaultValue = "") Set aspects1, + @RequestParam(value = "aspects", defaultValue = "") Set aspects2, @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") Boolean withSystemMetadata) throws URISyntaxException { @@ -219,7 +222,12 @@ public ResponseEntity getEntity( authentication, true); - return buildEntityList(opContext, List.of(urn), aspectNames, withSystemMetadata).stream() + return buildEntityList( + opContext, + List.of(urn), + ImmutableSet.builder().addAll(aspects1).addAll(aspects2).build(), + withSystemMetadata) + .stream() .findFirst() .map(ResponseEntity::ok) .orElse(ResponseEntity.notFound().header(NOT_FOUND_HEADER, "ENTITY").build()); @@ -352,7 +360,7 @@ public void deleteEntity( authentication, true); - entityService.deleteAspect(opContext, entityUrn, entitySpec.getKeyAspectName(), Map.of(), true); + entityService.deleteUrn(opContext, urn); } @Tag(name = "Generic Entities") diff --git a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java index ed0e8c0858526..9872f45648d7b 100644 --- a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java +++ b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java @@ -97,7 +97,7 @@ public void testAsyncDefaultAspects() throws URISyntaxException { .recordTemplate(mcp.getAspect()) .auditStamp(new AuditStamp()) .metadataChangeProposal(mcp) - .build(opContext.getRetrieverContext().get().getAspectRetriever()); + .build(opContext.getAspectRetrieverOpt().get()); when(aspectDao.runInTransactionWithRetry(any(), any(), anyInt())) .thenReturn( List.of(List.of( diff --git a/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java b/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java index d220883d24132..aaf90d279e0bd 100644 --- a/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java +++ b/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java @@ -40,9 +40,6 @@ public MockTimeseriesAspectService(long count, long filteredCount, String taskId this._taskId = taskId; } - @Override - public void configure() {} - @Override public long countByFilter( @Nonnull OperationContext operationContext, diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java index 27358c4c0e279..ac4c6895b757b 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -443,12 +443,18 @@ void ingestEntity( void setRetentionService(RetentionService retentionService); - RollbackResult deleteAspect( + default RollbackResult deleteAspect( @Nonnull OperationContext opContext, String urn, String aspectName, @Nonnull Map conditions, - boolean hardDelete); + boolean hardDelete) { + AspectRowSummary aspectRowSummary = + new AspectRowSummary().setUrn(urn).setAspectName(aspectName); + return rollbackWithConditions(opContext, List.of(aspectRowSummary), conditions, hardDelete) + .getRollbackResults() + .get(0); + } RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn); diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackResult.java index 9955a58c65339..143d6102f2523 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackResult.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackResult.java @@ -1,8 +1,13 @@ package com.linkedin.metadata.entity; +import static com.linkedin.metadata.utils.PegasusUtils.constructMCL; +import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; + +import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.SystemMetadata; import lombok.Value; @@ -18,4 +23,22 @@ public class RollbackResult { public ChangeType changeType; public Boolean keyAffected; public Integer additionalRowsAffected; + + public boolean isNoOp() { + return oldValue == newValue; + } + + public MetadataChangeLog toMCL(AuditStamp auditStamp) { + return constructMCL( + null, + urnToEntityName(urn), + urn, + changeType, + aspectName, + auditStamp, + newValue, + newSystemMetadata, + oldValue, + oldSystemMetadata); + } } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackRunResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackRunResult.java index a8c558df77e57..68ee66c2b50ed 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackRunResult.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackRunResult.java @@ -8,4 +8,5 @@ public class RollbackRunResult { public List rowsRolledBack; public Integer rowsDeletedFromEntityDeletion; + public List rollbackResults; } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java index e85e0567f963b..608a55674d859 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java @@ -1,12 +1,19 @@ package com.linkedin.metadata.entity; +import static com.linkedin.metadata.utils.PegasusUtils.constructMCL; +import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; + import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.mxe.MetadataAuditOperation; +import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; import java.util.concurrent.Future; +import javax.annotation.Nullable; import lombok.Builder; import lombok.Value; @@ -15,13 +22,35 @@ public class UpdateAspectResult { Urn urn; ChangeMCP request; - RecordTemplate oldValue; + @Nullable RecordTemplate oldValue; RecordTemplate newValue; - SystemMetadata oldSystemMetadata; + @Nullable SystemMetadata oldSystemMetadata; SystemMetadata newSystemMetadata; MetadataAuditOperation operation; AuditStamp auditStamp; long maxVersion; + @Nullable MetadataChangeProposal mcp; + /* + Whether the MCL was written to Elasticsearch prior to emitting the MCL + */ boolean processedMCL; Future mclFuture; + + public boolean isNoOp() { + return oldValue == newValue; + } + + public MetadataChangeLog toMCL() { + return constructMCL( + request.getMetadataChangeProposal(), + urnToEntityName(urn), + urn, + isNoOp() ? ChangeType.RESTATE : ChangeType.UPSERT, + request.getAspectName(), + auditStamp, + newValue, + newSystemMetadata, + oldValue, + oldSystemMetadata); + } } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphService.java index ad8fdbbb06895..16305d6ec4bcf 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphService.java @@ -314,7 +314,7 @@ void removeEdgesFromNode( @Nonnull final List relationshipTypes, @Nonnull final RelationshipFilter relationshipFilter); - void configure(); + default void configure() {} /** Removes all edges and nodes from the graph. */ void clear(); diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java index a253f9ffc2531..a3db4b029b68b 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java @@ -15,7 +15,7 @@ public interface EntitySearchService { - void configure(); + default void configure() {} /** Clear all data within the service */ void clear(@Nonnull OperationContext opContext); diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java index f06671ac3c314..3880ad1d8da11 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java @@ -40,7 +40,7 @@ List findByRegistry( List listRuns( Integer pageOffset, Integer pageSize, boolean includeSoftDeleted); - void configure(); + default void configure() {} void clear(); } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java index accc0c8ceeb3b..6b1f484ac0a51 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java @@ -18,7 +18,7 @@ public interface TimeseriesAspectService { /** Configure the Time-Series aspect service one time at boot-up. */ - void configure(); + default void configure() {} /** * Count the number of entries using a filter diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java index ffa0d600a2351..07f91bb52fe10 100644 --- a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java +++ b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java @@ -85,8 +85,7 @@ private void writeSearchCsv(WebApplicationContext ctx, PrintWriter pw) { entitySpecOpt -> { EntitySpec entitySpec = entitySpecOpt.get(); SearchRequest searchRequest = - SearchRequestHandler.getBuilder( - entitySpec, searchConfiguration, null, aspectRetriever) + SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, null) .getSearchRequest( getOperationContext(ctx) .withSearchFlags( diff --git a/smoke-test/tests/structured_properties/test_structured_properties.py b/smoke-test/tests/structured_properties/test_structured_properties.py index 44beab930091d..bf1b5b1292750 100644 --- a/smoke-test/tests/structured_properties/test_structured_properties.py +++ b/smoke-test/tests/structured_properties/test_structured_properties.py @@ -176,6 +176,15 @@ def get_property_from_entity( return None +def to_es_name(property_name=None, namespace=default_namespace, qualified_name=None): + if property_name: + namespace_field = namespace.replace(".", "_") + return f"structuredProperties.{namespace_field}_{property_name}" + else: + escaped_qualified_name = qualified_name.replace(".", "_") + return f"structuredProperties.{escaped_qualified_name}" + + # @tenacity.retry( # stop=tenacity.stop_after_attempt(sleep_times), # wait=tenacity.wait_fixed(sleep_sec), @@ -406,10 +415,6 @@ def test_dataset_yaml_loader(ingest_cleanup_data, graph): def test_structured_property_search(ingest_cleanup_data, graph: DataHubGraph, caplog): - def to_es_name(property_name, namespace=default_namespace): - namespace_field = namespace.replace(".", "_") - return f"structuredProperties.{namespace_field}_{property_name}" - # Attach structured property to entity and to field field_property_name = f"deprecationDate{randint(10, 10000)}" @@ -573,28 +578,6 @@ def patch_one(prop_name, prop_value): assert actual_property_values == [property_value_other] -def test_dataset_structured_property_hard_delete(ingest_cleanup_data, graph, caplog): - property_name = f"hardDeleteTest{randint(10, 10000)}Property" - value_type = "string" - property_urn = f"urn:li:structuredProperty:{default_namespace}.{property_name}" - - create_property_definition( - property_name=property_name, graph=graph, value_type=value_type - ) - - test_property = StructuredProperties.from_datahub(graph=graph, urn=property_urn) - assert test_property is not None - - try: - graph.hard_delete_entity(urn=property_urn) - raise AssertionError("Should not be able to HARD delete structured property") - except Exception as e: - if "Hard delete of Structured Property Definitions is not supported" in str(e): - pass - else: - raise e - - def test_dataset_structured_property_soft_delete_validation( ingest_cleanup_data, graph, caplog ): @@ -685,10 +668,6 @@ def test_dataset_structured_property_soft_delete_read_mutation( def test_dataset_structured_property_soft_delete_search_filter_validation( ingest_cleanup_data, graph, caplog ): - def to_es_name(property_name, namespace=default_namespace): - namespace_field = namespace.replace(".", "_") - return f"structuredProperties.{namespace_field}_{property_name}" - # Create a test structured property dataset_property_name = f"softDeleteSearchFilter{randint(10, 10000)}" property_value = 30 @@ -744,3 +723,97 @@ def to_es_name(property_name, namespace=default_namespace): pass else: raise e + + +def test_dataset_structured_property_delete(ingest_cleanup_data, graph, caplog): + # Create property, assign value to target dataset urn + def create_property(target_dataset, prop_value): + property_name = f"hardDeleteTest{randint(10, 10000)}Property" + value_type = "string" + property_urn = f"urn:li:structuredProperty:{default_namespace}.{property_name}" + + create_property_definition( + property_name=property_name, + graph=graph, + value_type=value_type, + cardinality="SINGLE", + ) + + test_property = StructuredProperties.from_datahub(graph=graph, urn=property_urn) + assert test_property is not None + + # assign + dataset_patcher: DatasetPatchBuilder = DatasetPatchBuilder(urn=target_dataset) + dataset_patcher.set_structured_property( + StructuredPropertyUrn.make_structured_property_urn(property_urn), + prop_value, + ) + for mcp in dataset_patcher.build(): + graph.emit(mcp) + + return test_property + + # create and assign 2 structured properties with values + property1 = create_property(dataset_urns[0], "foo") + property2 = create_property(dataset_urns[0], "bar") + wait_for_writes_to_sync() + + # validate #1 & #2 properties assigned + assert get_property_from_entity( + dataset_urns[0], + property1.qualified_name, + graph=graph, + ) == ["foo"] + assert get_property_from_entity( + dataset_urns[0], + property2.qualified_name, + graph=graph, + ) == ["bar"] + + def validate_search(qualified_name, expected): + entity_urns = list( + graph.get_urns_by_filter( + extraFilters=[ + { + "field": to_es_name(qualified_name=qualified_name), + "negated": "false", + "condition": "EXISTS", + } + ] + ) + ) + assert entity_urns == expected + + # Validate search works for property #1 & #2 + validate_search(property1.qualified_name, expected=[dataset_urns[0]]) + validate_search(property2.qualified_name, expected=[dataset_urns[0]]) + + # delete the structured property #1 + graph.hard_delete_entity(urn=property1.urn) + wait_for_writes_to_sync() + + # validate property #1 deleted and property #2 remains + assert ( + get_property_from_entity( + dataset_urns[0], + property1.qualified_name, + graph=graph, + ) + is None + ) + assert get_property_from_entity( + dataset_urns[0], + property2.qualified_name, + graph=graph, + ) == ["bar"] + + # assert property 1 definition was removed + property1_definition = graph.get_aspect( + property1.urn, StructuredPropertyDefinitionClass + ) + assert property1_definition is None + + wait_for_writes_to_sync() + # Validate search works for property #1 & #2 + validate_search(property1.qualified_name, expected=[]) + validate_search(property2.qualified_name, expected=[dataset_urns[0]]) From 2d727a960b2370c3f4be7d80c0e2b6790580dd48 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 17 Jun 2024 22:37:39 -0700 Subject: [PATCH 6/6] feat(ingest/snowflake): support more than 10k views in a db (#10718) --- .../source/snowflake/snowflake_query.py | 60 +++++-------- .../source/snowflake/snowflake_report.py | 1 - .../source/snowflake/snowflake_schema.py | 86 ++++++++++--------- .../source/snowflake/snowflake_schema_gen.py | 6 -- .../tests/integration/snowflake/common.py | 5 +- .../snowflake/test_snowflake_failures.py | 2 +- 6 files changed, 69 insertions(+), 91 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index 8187fce78e5e4..b3eb23b25e0a3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -5,6 +5,8 @@ from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain from datahub.ingestion.source.snowflake.snowflake_config import DEFAULT_TABLES_DENY_LIST +SHOW_VIEWS_MAX_PAGE_SIZE = 10000 + def create_deny_regex_sql_filter( deny_pattern: List[str], filter_cols: List[str] @@ -202,48 +204,28 @@ def get_tags_on_columns_with_propagation( FROM table("{db_name}".information_schema.tag_references_all_columns('{quoted_table_identifier}', '{SnowflakeObjectDomain.TABLE}')); """ - # View definition is retrived in information_schema query only if role is owner of view. Hence this query is not used. - # https://community.snowflake.com/s/article/Is-it-possible-to-see-the-view-definition-in-information-schema-views-from-a-non-owner-role @staticmethod - def views_for_database(db_name: Optional[str]) -> str: - db_clause = f'"{db_name}".' if db_name is not None else "" - return f""" - SELECT table_catalog AS "TABLE_CATALOG", - table_schema AS "TABLE_SCHEMA", - table_name AS "TABLE_NAME", - created AS "CREATED", - last_altered AS "LAST_ALTERED", - comment AS "COMMENT", - view_definition AS "VIEW_DEFINITION" - FROM {db_clause}information_schema.views t - WHERE table_schema != 'INFORMATION_SCHEMA' - order by table_schema, table_name""" - - # View definition is retrived in information_schema query only if role is owner of view. Hence this query is not used. - # https://community.snowflake.com/s/article/Is-it-possible-to-see-the-view-definition-in-information-schema-views-from-a-non-owner-role - @staticmethod - def views_for_schema(schema_name: str, db_name: Optional[str]) -> str: - db_clause = f'"{db_name}".' if db_name is not None else "" - return f""" - SELECT table_catalog AS "TABLE_CATALOG", - table_schema AS "TABLE_SCHEMA", - table_name AS "TABLE_NAME", - created AS "CREATED", - last_altered AS "LAST_ALTERED", - comment AS "COMMENT", - view_definition AS "VIEW_DEFINITION" - FROM {db_clause}information_schema.views t - where table_schema='{schema_name}' - order by table_schema, table_name""" + def show_views_for_database( + db_name: str, + limit: int = SHOW_VIEWS_MAX_PAGE_SIZE, + view_pagination_marker: Optional[str] = None, + ) -> str: + # While there is an information_schema.views view, that only shows the view definition if the role + # is an owner of the view. That doesn't work for us. + # https://community.snowflake.com/s/article/Is-it-possible-to-see-the-view-definition-in-information-schema-views-from-a-non-owner-role - @staticmethod - def show_views_for_database(db_name: str) -> str: - return f"""show views in database "{db_name}";""" + # SHOW VIEWS can return a maximum of 10000 rows. + # https://docs.snowflake.com/en/sql-reference/sql/show-views#usage-notes + assert limit <= SHOW_VIEWS_MAX_PAGE_SIZE - @staticmethod - def show_views_for_schema(schema_name: str, db_name: Optional[str]) -> str: - db_clause = f'"{db_name}".' if db_name is not None else "" - return f"""show views in schema {db_clause}"{schema_name}";""" + # To work around this, we paginate through the results using the FROM clause. + from_clause = ( + f"""FROM '{view_pagination_marker}'""" if view_pagination_marker else "" + ) + return f"""\ +SHOW VIEWS IN DATABASE "{db_name}" +LIMIT {limit} {from_clause}; +""" @staticmethod def columns_for_schema(schema_name: str, db_name: Optional[str]) -> str: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py index db2095da01134..d84580a94ab4e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py @@ -117,7 +117,6 @@ class SnowflakeV2Report( # "Information schema query returned too much data. Please repeat query with more selective predicates."" # This will result in overall increase in time complexity num_get_tables_for_schema_queries: int = 0 - num_get_views_for_schema_queries: int = 0 num_get_columns_for_table_queries: int = 0 # these will be non-zero if the user choses to enable the extract_tags = "with_lineage" option, which requires diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py index 3e26d2acd78e1..3254224e437a6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py @@ -9,7 +9,10 @@ from datahub.ingestion.api.report import SupportsAsObj from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain -from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery +from datahub.ingestion.source.snowflake.snowflake_query import ( + SHOW_VIEWS_MAX_PAGE_SIZE, + SnowflakeQuery, +) from datahub.ingestion.source.snowflake.snowflake_utils import SnowflakeQueryMixin from datahub.ingestion.source.sql.sql_generic import BaseColumn, BaseTable, BaseView from datahub.utilities.serialized_lru_cache import serialized_lru_cache @@ -324,53 +327,54 @@ def get_tables_for_schema( return tables @serialized_lru_cache(maxsize=1) - def get_views_for_database( - self, db_name: str - ) -> Optional[Dict[str, List[SnowflakeView]]]: + def get_views_for_database(self, db_name: str) -> Dict[str, List[SnowflakeView]]: + page_limit = SHOW_VIEWS_MAX_PAGE_SIZE + views: Dict[str, List[SnowflakeView]] = {} - try: - cur = self.query(SnowflakeQuery.show_views_for_database(db_name)) - except Exception as e: - logger.debug( - f"Failed to get all views for database - {db_name}", exc_info=e - ) - # Error - Information schema query returned too much data. Please repeat query with more selective predicates. - return None - for table in cur: - if table["schema_name"] not in views: - views[table["schema_name"]] = [] - views[table["schema_name"]].append( - SnowflakeView( - name=table["name"], - created=table["created_on"], - # last_altered=table["last_altered"], - comment=table["comment"], - view_definition=table["text"], - last_altered=table["created_on"], - materialized=table.get("is_materialized", "false").lower() - == "true", + first_iteration = True + view_pagination_marker: Optional[str] = None + while first_iteration or view_pagination_marker is not None: + cur = self.query( + SnowflakeQuery.show_views_for_database( + db_name, + limit=page_limit, + view_pagination_marker=view_pagination_marker, ) ) - return views - def get_views_for_schema( - self, schema_name: str, db_name: str - ) -> List[SnowflakeView]: - views: List[SnowflakeView] = [] + first_iteration = False + view_pagination_marker = None + + result_set_size = 0 + for view in cur: + result_set_size += 1 + + view_name = view["name"] + schema_name = view["schema_name"] + if schema_name not in views: + views[schema_name] = [] + views[schema_name].append( + SnowflakeView( + name=view_name, + created=view["created_on"], + # last_altered=table["last_altered"], + comment=view["comment"], + view_definition=view["text"], + last_altered=view["created_on"], + materialized=( + view.get("is_materialized", "false").lower() == "true" + ), + ) + ) - cur = self.query(SnowflakeQuery.show_views_for_schema(schema_name, db_name)) - for table in cur: - views.append( - SnowflakeView( - name=table["name"], - created=table["created_on"], - # last_altered=table["last_altered"], - comment=table["comment"], - view_definition=table["text"], - last_altered=table["created_on"], + if result_set_size >= page_limit: + # If we hit the limit, we need to send another request to get the next page. + logger.info( + f"Fetching next page of views for {db_name} - after {view_name}" ) - ) + view_pagination_marker = view_name + return views @serialized_lru_cache(maxsize=SCHEMA_PARALLELISM) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py index 5a4e37078dd75..920cf741770c3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py @@ -1011,12 +1011,6 @@ def get_views_for_schema( ) -> List[SnowflakeView]: views = self.data_dictionary.get_views_for_database(db_name) - # get all views for database failed, - # falling back to get views for schema - if views is None: - self.report.num_get_views_for_schema_queries += 1 - return self.data_dictionary.get_views_for_schema(schema_name, db_name) - # Some schema may not have any table return views.get(schema_name, []) diff --git a/metadata-ingestion/tests/integration/snowflake/common.py b/metadata-ingestion/tests/integration/snowflake/common.py index 30c4b2bec3a04..ea08a94267480 100644 --- a/metadata-ingestion/tests/integration/snowflake/common.py +++ b/metadata-ingestion/tests/integration/snowflake/common.py @@ -224,8 +224,6 @@ def default_query_results( # noqa: C901 ] elif query == SnowflakeQuery.tables_for_database("TEST_DB"): raise Exception("Information schema query returned too much data") - elif query == SnowflakeQuery.show_views_for_database("TEST_DB"): - raise Exception("Information schema query returned too much data") elif query == SnowflakeQuery.tables_for_schema("TEST_SCHEMA", "TEST_DB"): return [ { @@ -241,7 +239,8 @@ def default_query_results( # noqa: C901 } for tbl_idx in range(1, num_tables + 1) ] - elif query == SnowflakeQuery.show_views_for_schema("TEST_SCHEMA", "TEST_DB"): + elif query == SnowflakeQuery.show_views_for_database("TEST_DB"): + # TODO: Add tests for view pagination. return [ { "schema_name": "TEST_SCHEMA", diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py index 9760ea1a9c72b..3a37382de65b4 100644 --- a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py @@ -144,7 +144,7 @@ def test_snowflake_no_tables_causes_pipeline_failure( ) sf_cursor.execute.side_effect = query_permission_response_override( no_tables_fn, - [SnowflakeQuery.show_views_for_schema("TEST_SCHEMA", "TEST_DB")], + [SnowflakeQuery.show_views_for_database("TEST_DB")], [], )