diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 587833a41ba16..32f29c9b0950d 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -1029,6 +1029,7 @@ jobs: TEST_STRATEGY: ${{ matrix.test_strategy }} run: | echo "$DATAHUB_VERSION" + ./gradlew --stop ./smoke-test/smoke.sh - name: Disk Check run: df -h . && docker images @@ -1042,8 +1043,9 @@ jobs: uses: actions/upload-artifact@v3 if: failure() with: - name: docker logs + name: docker-logs-${{ matrix.test_strategy }} path: "docker_logs/*.log" + retention-days: 5 - name: Upload screenshots uses: actions/upload-artifact@v3 if: failure() diff --git a/.github/workflows/pr-labeler.yml b/.github/workflows/pr-labeler.yml index a3d1b85df0818..1ae3edae7aa90 100644 --- a/.github/workflows/pr-labeler.yml +++ b/.github/workflows/pr-labeler.yml @@ -61,18 +61,14 @@ jobs: contains( fromJson('[ "siladitya2", - "sgomezvillamor", - "ngamanda", - "HarveyLeo", - "frsann", "bossenti", - "nikolakasev", "PatrickfBraz", "cuong-pham", "sudhakarast", "tkdrahn", "rtekal", - "sgm44" + "mikeburke24", + "DSchmidtDev" ]'), github.actor ) diff --git a/build.gradle b/build.gradle index 07ca1f09e813c..a83d878f46965 100644 --- a/build.gradle +++ b/build.gradle @@ -34,8 +34,8 @@ buildscript { // Releases: https://github.com/linkedin/rest.li/blob/master/CHANGELOG.md ext.pegasusVersion = '29.57.0' ext.mavenVersion = '3.6.3' - ext.springVersion = '6.1.6' - ext.springBootVersion = '3.2.6' + ext.springVersion = '6.1.13' + ext.springBootVersion = '3.2.9' ext.springKafkaVersion = '3.1.6' ext.openTelemetryVersion = '1.18.0' ext.neo4jVersion = '5.14.0' @@ -222,10 +222,10 @@ project.ext.externalDependency = [ 'playServer': "com.typesafe.play:play-server_2.12:$playVersion", 'playTest': "com.typesafe.play:play-test_2.12:$playVersion", 'playFilters': "com.typesafe.play:filters-helpers_2.12:$playVersion", - 'pac4j': 'org.pac4j:pac4j-oidc:4.5.7', + 'pac4j': 'org.pac4j:pac4j-oidc:4.5.8', 'playPac4j': 'org.pac4j:play-pac4j_2.12:9.0.2', 'postgresql': 'org.postgresql:postgresql:42.3.9', - 'protobuf': 'com.google.protobuf:protobuf-java:3.19.6', + 'protobuf': 'com.google.protobuf:protobuf-java:3.25.5', 'grpcProtobuf': 'io.grpc:grpc-protobuf:1.53.0', 'rangerCommons': 'org.apache.ranger:ranger-plugins-common:2.3.0', 'reflections': 'org.reflections:reflections:0.9.9', @@ -267,7 +267,7 @@ project.ext.externalDependency = [ 'testContainersOpenSearch': 'org.opensearch:opensearch-testcontainers:2.0.0', 'typesafeConfig':'com.typesafe:config:1.4.1', 'wiremock':'com.github.tomakehurst:wiremock:2.10.0', - 'zookeeper': 'org.apache.zookeeper:zookeeper:3.7.2', + 'zookeeper': 'org.apache.zookeeper:zookeeper:3.6.2', 'wire': 'com.squareup.wire:wire-compiler:3.7.1', 'charle': 'com.charleskorn.kaml:kaml:0.53.0', 'common': 'commons-io:commons-io:2.7', diff --git a/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java b/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java index 080ca236630bf..f982944071498 100644 --- a/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java +++ b/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java @@ -243,6 +243,9 @@ public Builder from(final com.typesafe.config.Config configs, final String ssoSe Optional.ofNullable(getOptional(configs, OIDC_PREFERRED_JWS_ALGORITHM, null)); } + grantType = Optional.ofNullable(getOptional(configs, OIDC_GRANT_TYPE, null)); + acrValues = Optional.ofNullable(getOptional(configs, OIDC_ACR_VALUES, null)); + return this; } diff --git a/datahub-frontend/app/auth/sso/oidc/custom/CustomOidcClient.java b/datahub-frontend/app/auth/sso/oidc/custom/CustomOidcClient.java index 67ec5d78add83..3a0a247cb761e 100644 --- a/datahub-frontend/app/auth/sso/oidc/custom/CustomOidcClient.java +++ b/datahub-frontend/app/auth/sso/oidc/custom/CustomOidcClient.java @@ -18,7 +18,7 @@ public CustomOidcClient(final OidcConfiguration configuration) { protected void clientInit() { CommonHelper.assertNotNull("configuration", getConfiguration()); getConfiguration().init(); - defaultRedirectionActionBuilder(new OidcRedirectionActionBuilder(getConfiguration(), this)); + defaultRedirectionActionBuilder(new CustomOidcRedirectionActionBuilder(getConfiguration(), this)); defaultCredentialsExtractor(new OidcExtractor(getConfiguration(), this)); defaultAuthenticator(new CustomOidcAuthenticator(this)); defaultProfileCreator(new OidcProfileCreator<>(getConfiguration(), this)); diff --git a/datahub-frontend/app/auth/sso/oidc/custom/CustomOidcRedirectionActionBuilder.java b/datahub-frontend/app/auth/sso/oidc/custom/CustomOidcRedirectionActionBuilder.java new file mode 100644 index 0000000000000..bdeeacc895af3 --- /dev/null +++ b/datahub-frontend/app/auth/sso/oidc/custom/CustomOidcRedirectionActionBuilder.java @@ -0,0 +1,46 @@ +package auth.sso.oidc.custom; + +import java.util.Map; +import java.util.Optional; +import org.pac4j.core.context.WebContext; +import org.pac4j.core.exception.http.RedirectionAction; +import org.pac4j.core.exception.http.RedirectionActionHelper; +import org.pac4j.oidc.client.OidcClient; +import org.pac4j.oidc.config.OidcConfiguration; +import org.pac4j.oidc.redirect.OidcRedirectionActionBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class CustomOidcRedirectionActionBuilder extends OidcRedirectionActionBuilder { + + private static final Logger logger = LoggerFactory.getLogger(OidcRedirectionActionBuilder.class); + public CustomOidcRedirectionActionBuilder(OidcConfiguration configuration, OidcClient client) { + super(configuration, client); + } + + @Override + public Optional getRedirectionAction(WebContext context) { + Map params = this.buildParams(); + String computedCallbackUrl = this.client.computeFinalCallbackUrl(context); + params.put("redirect_uri", computedCallbackUrl); + this.addStateAndNonceParameters(context, params); + if (this.configuration.getMaxAge() != null) { + params.put("max_age", this.configuration.getMaxAge().toString()); + } + + String location = this.buildAuthenticationRequestUrl(params); + + logger.debug("Custom parameters: {}", this.configuration.getCustomParams()); + + String acrValues = this.configuration.getCustomParam("acr_values"); + + if (acrValues != null && !location.contains("acr_values=")) { + location += (location.contains("?") ? "&" : "?") + "acr_values=" + acrValues; + } + + logger.debug("Authentication request url: {}", location); + return Optional.of(RedirectionActionHelper.buildRedirectUrlAction(context, location)); + } + +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java index 72643ccac6325..9ba2778c285aa 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java @@ -78,7 +78,8 @@ public List getTimeseriesChart( Optional dimension, // Length 1 for now Map> filters, Map> mustNotFilters, - Optional uniqueOn) { + Optional uniqueOn, + String dateRangeField) { log.debug( String.format( @@ -87,11 +88,11 @@ public List getTimeseriesChart( + String.format("filters: %s, uniqueOn: %s", filters, uniqueOn)); AggregationBuilder filteredAgg = - getFilteredAggregation(filters, mustNotFilters, Optional.of(dateRange)); + getFilteredAggregation(filters, mustNotFilters, Optional.of(dateRange), dateRangeField); AggregationBuilder dateHistogram = AggregationBuilders.dateHistogram(DATE_HISTOGRAM) - .field("timestamp") + .field(dateRangeField) .calendarInterval(new DateHistogramInterval(granularity.name().toLowerCase())); uniqueOn.ifPresent(s -> dateHistogram.subAggregation(getUniqueQuery(s))); @@ -128,6 +129,25 @@ public List getTimeseriesChart( } } + public List getTimeseriesChart( + String indexName, + DateRange dateRange, + DateInterval granularity, + Optional dimension, // Length 1 for now + Map> filters, + Map> mustNotFilters, + Optional uniqueOn) { + return getTimeseriesChart( + indexName, + dateRange, + granularity, + dimension, + filters, + mustNotFilters, + uniqueOn, + "timestamp"); + } + private int extractCount(MultiBucketsAggregation.Bucket bucket, boolean didUnique) { return didUnique ? (int) bucket.getAggregations().get(UNIQUE).getValue() @@ -323,20 +343,38 @@ private Filter executeAndExtract(SearchRequest searchRequest) { } } + // Make dateRangeField as customizable private AggregationBuilder getFilteredAggregation( Map> mustFilters, Map> mustNotFilters, - Optional dateRange) { + Optional dateRange, + String dateRangeField) { BoolQueryBuilder filteredQuery = QueryBuilders.boolQuery(); mustFilters.forEach((key, values) -> filteredQuery.must(QueryBuilders.termsQuery(key, values))); mustNotFilters.forEach( (key, values) -> filteredQuery.mustNot(QueryBuilders.termsQuery(key, values))); - dateRange.ifPresent(range -> filteredQuery.must(dateRangeQuery(range))); + dateRange.ifPresent(range -> filteredQuery.must(dateRangeQuery(range, dateRangeField))); return AggregationBuilders.filter(FILTERED, filteredQuery); } + private AggregationBuilder getFilteredAggregation( + Map> mustFilters, + Map> mustNotFilters, + Optional dateRange) { + // Use timestamp as dateRangeField + return getFilteredAggregation(mustFilters, mustNotFilters, dateRange, "timestamp"); + } + private QueryBuilder dateRangeQuery(DateRange dateRange) { - return QueryBuilders.rangeQuery("timestamp").gte(dateRange.getStart()).lt(dateRange.getEnd()); + // Use timestamp as dateRangeField + return dateRangeQuery(dateRange, "timestamp"); + } + + // Make dateRangeField as customizable + private QueryBuilder dateRangeQuery(DateRange dateRange, String dateRangeField) { + return QueryBuilders.rangeQuery(dateRangeField) + .gte(dateRange.getStart()) + .lt(dateRange.getEnd()); } private AggregationBuilder getUniqueQuery(String uniqueOn) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java index 2c058eb60a7ee..fff1dfee7ef9c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java @@ -14,6 +14,7 @@ import com.linkedin.datahub.graphql.generated.ScrollAcrossLineageResults; import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.types.common.mappers.LineageFlagsInputMapper; +import com.linkedin.datahub.graphql.types.common.mappers.SearchFlagsInputMapper; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; import com.linkedin.datahub.graphql.types.mappers.UrnScrollAcrossLineageResultsMapper; import com.linkedin.entity.client.EntityClient; @@ -89,7 +90,6 @@ public CompletableFuture get(DataFetchingEnvironment if (lineageFlags.getEndTimeMillis() == null && endTimeMillis != null) { lineageFlags.setEndTimeMillis(endTimeMillis); } - ; com.linkedin.metadata.graph.LineageDirection resolvedDirection = com.linkedin.metadata.graph.LineageDirection.valueOf(lineageDirection.toString()); @@ -107,17 +107,13 @@ public CompletableFuture get(DataFetchingEnvironment count); final SearchFlags searchFlags; - final com.linkedin.datahub.graphql.generated.SearchFlags inputFlags = - input.getSearchFlags(); + com.linkedin.datahub.graphql.generated.SearchFlags inputFlags = input.getSearchFlags(); if (inputFlags != null) { - searchFlags = - new SearchFlags() - .setSkipCache(inputFlags.getSkipCache()) - .setFulltext(inputFlags.getFulltext()) - .setMaxAggValues(inputFlags.getMaxAggValues()); + searchFlags = SearchFlagsInputMapper.INSTANCE.apply(context, inputFlags); } else { searchFlags = null; } + return UrnScrollAcrossLineageResultsMapper.map( context, _entityClient.scrollAcrossLineage( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolver.java index c432281ec1684..f5c101ba2bf64 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolver.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.structuredproperties; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; import com.linkedin.common.urn.Urn; @@ -21,17 +22,21 @@ import com.linkedin.structured.PrimitivePropertyValue; import com.linkedin.structured.PropertyCardinality; import com.linkedin.structured.PropertyValue; +import com.linkedin.structured.StructuredPropertyDefinition; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import java.util.Objects; import java.util.concurrent.CompletableFuture; import javax.annotation.Nonnull; +import javax.annotation.Nullable; public class UpdateStructuredPropertyResolver implements DataFetcher> { private final EntityClient _entityClient; + private static final String ALLOWED_TYPES = "allowedTypes"; + public UpdateStructuredPropertyResolver(@Nonnull final EntityClient entityClient) { _entityClient = Objects.requireNonNull(entityClient, "entityClient must not be null"); } @@ -52,6 +57,8 @@ public CompletableFuture get(final DataFetchingEnviron "Unable to update structured property. Please contact your admin."); } final Urn propertyUrn = UrnUtils.getUrn(input.getUrn()); + StructuredPropertyDefinition existingDefinition = + getExistingStructuredProperty(context, propertyUrn); StructuredPropertyDefinitionPatchBuilder builder = new StructuredPropertyDefinitionPatchBuilder().urn(propertyUrn); @@ -65,7 +72,7 @@ public CompletableFuture get(final DataFetchingEnviron builder.setImmutable(input.getImmutable()); } if (input.getTypeQualifier() != null) { - buildTypeQualifier(input, builder); + buildTypeQualifier(input, builder, existingDefinition); } if (input.getNewAllowedValues() != null) { buildAllowedValues(input, builder); @@ -97,10 +104,16 @@ public CompletableFuture get(final DataFetchingEnviron private void buildTypeQualifier( @Nonnull final UpdateStructuredPropertyInput input, - @Nonnull final StructuredPropertyDefinitionPatchBuilder builder) { + @Nonnull final StructuredPropertyDefinitionPatchBuilder builder, + @Nullable final StructuredPropertyDefinition existingDefinition) { if (input.getTypeQualifier().getNewAllowedTypes() != null) { final StringArrayMap typeQualifier = new StringArrayMap(); StringArray allowedTypes = new StringArray(); + if (existingDefinition != null + && existingDefinition.getTypeQualifier() != null + && existingDefinition.getTypeQualifier().get(ALLOWED_TYPES) != null) { + allowedTypes.addAll(existingDefinition.getTypeQualifier().get(ALLOWED_TYPES)); + } allowedTypes.addAll(input.getTypeQualifier().getNewAllowedTypes()); typeQualifier.put("allowedTypes", allowedTypes); builder.setTypeQualifier(typeQualifier); @@ -127,4 +140,18 @@ private void buildAllowedValues( builder.addAllowedValue(value); }); } + + private StructuredPropertyDefinition getExistingStructuredProperty( + @Nonnull final QueryContext context, @Nonnull final Urn propertyUrn) throws Exception { + EntityResponse response = + _entityClient.getV2( + context.getOperationContext(), STRUCTURED_PROPERTY_ENTITY_NAME, propertyUrn, null); + + if (response != null + && response.getAspects().containsKey(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) { + return new StructuredPropertyDefinition( + response.getAspects().get(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME).getValue().data()); + } + return null; + } } diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index a2e2fe9163f53..fd112c9524ac9 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -9157,6 +9157,10 @@ enum PolicyMatchCondition { Whether the field matches the value """ EQUALS + """ + Whether the field value starts with the value + """ + STARTS_WITH } """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolverTest.java new file mode 100644 index 0000000000000..a12f593253b53 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolverTest.java @@ -0,0 +1,155 @@ +package com.linkedin.datahub.graphql.resolvers.search; + +import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; +import static org.mockito.ArgumentMatchers.nullable; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyList; +import static org.mockito.Mockito.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import com.datahub.authentication.Authentication; +import com.linkedin.common.UrnArrayArray; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.LineageDirection; +import com.linkedin.datahub.graphql.generated.ScrollAcrossLineageInput; +import com.linkedin.datahub.graphql.generated.ScrollAcrossLineageResults; +import com.linkedin.datahub.graphql.generated.SearchAcrossLineageResult; +import com.linkedin.datahub.graphql.generated.SearchFlags; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.AggregationMetadataArray; +import com.linkedin.metadata.search.LineageScrollResult; +import com.linkedin.metadata.search.LineageSearchEntity; +import com.linkedin.metadata.search.LineageSearchEntityArray; +import com.linkedin.metadata.search.MatchedFieldArray; +import com.linkedin.metadata.search.SearchResultMetadata; +import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; +import java.io.InputStream; +import java.util.Collections; +import java.util.List; +import org.mockito.ArgumentCaptor; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +public class ScrollAcrossLineageResolverTest { + private static final String SOURCE_URN_STRING = + "urn:li:dataset:(urn:li:dataPlatform:foo,bar,PROD)"; + private static final String TARGET_URN_STRING = + "urn:li:dataset:(urn:li:dataPlatform:foo,baz,PROD)"; + private static final String QUERY = ""; + private static final int START = 0; + private static final int COUNT = 10; + private static final Long START_TIMESTAMP_MILLIS = 0L; + private static final Long END_TIMESTAMP_MILLIS = 1000L; + private EntityClient _entityClient; + private DataFetchingEnvironment _dataFetchingEnvironment; + private Authentication _authentication; + private ScrollAcrossLineageResolver _resolver; + + @BeforeTest + public void disableAssert() { + PathSpecBasedSchemaAnnotationVisitor.class + .getClassLoader() + .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); + } + + @BeforeMethod + public void setupTest() { + _entityClient = mock(EntityClient.class); + _dataFetchingEnvironment = mock(DataFetchingEnvironment.class); + _authentication = mock(Authentication.class); + _resolver = new ScrollAcrossLineageResolver(_entityClient); + } + + @Test + public void testAllEntitiesInitialization() { + InputStream inputStream = ClassLoader.getSystemResourceAsStream("entity-registry.yml"); + EntityRegistry entityRegistry = new ConfigEntityRegistry(inputStream); + SearchAcrossLineageResolver resolver = + new SearchAcrossLineageResolver(_entityClient, entityRegistry); + assertTrue(resolver._allEntities.contains("dataset")); + assertTrue(resolver._allEntities.contains("dataFlow")); + // Test for case sensitivity + assertFalse(resolver._allEntities.contains("dataflow")); + } + + @Test + public void testSearchAcrossLineage() throws Exception { + final QueryContext mockContext = getMockAllowContext(); + when(mockContext.getAuthentication()).thenReturn(_authentication); + + when(_dataFetchingEnvironment.getContext()).thenReturn(mockContext); + + final SearchFlags searchFlags = new SearchFlags(); + searchFlags.setFulltext(true); + + final ScrollAcrossLineageInput input = new ScrollAcrossLineageInput(); + input.setCount(COUNT); + input.setDirection(LineageDirection.DOWNSTREAM); + input.setOrFilters(Collections.emptyList()); + input.setQuery(QUERY); + input.setTypes(Collections.emptyList()); + input.setStartTimeMillis(START_TIMESTAMP_MILLIS); + input.setEndTimeMillis(END_TIMESTAMP_MILLIS); + input.setUrn(SOURCE_URN_STRING); + input.setSearchFlags(searchFlags); + when(_dataFetchingEnvironment.getArgument(eq("input"))).thenReturn(input); + + final LineageScrollResult lineageSearchResult = new LineageScrollResult(); + lineageSearchResult.setNumEntities(1); + lineageSearchResult.setPageSize(10); + + final SearchResultMetadata searchResultMetadata = new SearchResultMetadata(); + searchResultMetadata.setAggregations(new AggregationMetadataArray()); + lineageSearchResult.setMetadata(searchResultMetadata); + + final LineageSearchEntity lineageSearchEntity = new LineageSearchEntity(); + lineageSearchEntity.setEntity(UrnUtils.getUrn(TARGET_URN_STRING)); + lineageSearchEntity.setScore(15.0); + lineageSearchEntity.setDegree(1); + lineageSearchEntity.setMatchedFields(new MatchedFieldArray()); + lineageSearchEntity.setPaths(new UrnArrayArray()); + lineageSearchResult.setEntities(new LineageSearchEntityArray(lineageSearchEntity)); + ArgumentCaptor opContext = ArgumentCaptor.forClass(OperationContext.class); + + when(_entityClient.scrollAcrossLineage( + opContext.capture(), + eq(UrnUtils.getUrn(SOURCE_URN_STRING)), + eq(com.linkedin.metadata.graph.LineageDirection.DOWNSTREAM), + anyList(), + eq(QUERY), + eq(null), + any(), + eq(null), + nullable(String.class), + nullable(String.class), + eq(COUNT))) + .thenReturn(lineageSearchResult); + + final ScrollAcrossLineageResults results = _resolver.get(_dataFetchingEnvironment).join(); + assertEquals(results.getCount(), 10); + assertEquals(results.getTotal(), 1); + assertEquals( + opContext.getValue().getSearchContext().getLineageFlags().getStartTimeMillis(), + START_TIMESTAMP_MILLIS); + assertEquals( + opContext.getValue().getSearchContext().getLineageFlags().getEndTimeMillis(), + END_TIMESTAMP_MILLIS); + + final List entities = results.getSearchResults(); + assertEquals(entities.size(), 1); + final SearchAcrossLineageResult entity = entities.get(0); + assertEquals(entity.getEntity().getUrn(), TARGET_URN_STRING); + assertEquals(entity.getEntity().getType(), EntityType.DATASET); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolverTest.java index 971a53de9473b..b818bcfb7d7f4 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolverTest.java @@ -89,8 +89,8 @@ public void testGetFailure() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - // Validate that ingest was called, but that caused a failure - Mockito.verify(mockEntityClient, Mockito.times(1)) + // Validate that ingest was not called since there was a get failure before ingesting + Mockito.verify(mockEntityClient, Mockito.times(0)) .ingestProposal(any(), any(MetadataChangeProposal.class), Mockito.eq(false)); } diff --git a/datahub-upgrade/build.gradle b/datahub-upgrade/build.gradle index 5d814dd876679..e808f9e87687c 100644 --- a/datahub-upgrade/build.gradle +++ b/datahub-upgrade/build.gradle @@ -121,6 +121,34 @@ task run(type: Exec) { "-Dserver.port=8083", bootJar.getArchiveFile().get(), "-u", "SystemUpdate" } +/** + * Runs RestoreIndices on locally running system. The batchSize are set to + * test the process with pagination and not designed for optimal performance. + */ +task runRestoreIndices(type: Exec) { + dependsOn bootJar + group = "Execution" + description = "Run the restore indices process locally." + environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml" + commandLine "java", "-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n", + "-jar", + "-Dkafka.schemaRegistry.url=http://localhost:8080/schema-registry/api", + "-Dserver.port=8083", + bootJar.getArchiveFile().get(), "-u", "RestoreIndices", "-a", "batchSize=100" +} + +task runRestoreIndicesUrn(type: Exec) { + dependsOn bootJar + group = "Execution" + description = "Run the restore indices process locally." + environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml" + commandLine "java", "-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n", + "-jar", + "-Dkafka.schemaRegistry.url=http://localhost:8080/schema-registry/api", + "-Dserver.port=8083", + bootJar.getArchiveFile().get(), "-u", "RestoreIndices", "-a", "batchSize=100", "-a", "urnBasedPagination=true" +} + docker { name "${docker_registry}/${docker_repo}:v${version}" version "v${version}" diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java index dea98c5cbcb13..8b33e4e7c2164 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java @@ -12,12 +12,13 @@ import com.linkedin.gms.factory.kafka.common.TopicConventionFactory; import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; -import com.linkedin.metadata.aspect.GraphRetriever; import com.linkedin.metadata.config.kafka.KafkaConfiguration; import com.linkedin.metadata.dao.producer.KafkaEventProducer; import com.linkedin.metadata.dao.producer.KafkaHealthChecker; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceAspectRetriever; +import com.linkedin.metadata.graph.GraphService; +import com.linkedin.metadata.graph.SystemGraphRetriever; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.search.SearchServiceSearchRetriever; @@ -145,7 +146,7 @@ protected OperationContext javaSystemOperationContext( @Nonnull final EntityRegistry entityRegistry, @Nonnull final EntityService entityService, @Nonnull final RestrictedService restrictedService, - @Nonnull final GraphRetriever graphRetriever, + @Nonnull final GraphService graphService, @Nonnull final SearchService searchService, @Qualifier("baseElasticSearchComponents") BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components) { @@ -159,6 +160,9 @@ protected OperationContext javaSystemOperationContext( SearchServiceSearchRetriever searchServiceSearchRetriever = SearchServiceSearchRetriever.builder().searchService(searchService).build(); + SystemGraphRetriever systemGraphRetriever = + SystemGraphRetriever.builder().graphService(graphService).build(); + OperationContext systemOperationContext = OperationContext.asSystem( operationContextConfig, @@ -168,11 +172,12 @@ protected OperationContext javaSystemOperationContext( components.getIndexConvention(), RetrieverContext.builder() .aspectRetriever(entityServiceAspectRetriever) - .graphRetriever(graphRetriever) + .graphRetriever(systemGraphRetriever) .searchRetriever(searchServiceSearchRetriever) .build()); entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext); + systemGraphRetriever.setSystemOperationContext(systemOperationContext); searchServiceSearchRetriever.setSystemOperationContext(systemOperationContext); return systemOperationContext; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexDataJobViaNodesCLLConfig.java similarity index 85% rename from datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java rename to datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexDataJobViaNodesCLLConfig.java index 4956254062ff9..a973876c6715f 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexDataJobViaNodesCLLConfig.java @@ -1,7 +1,8 @@ -package com.linkedin.datahub.upgrade.config; +package com.linkedin.datahub.upgrade.config.graph; +import com.linkedin.datahub.upgrade.config.SystemUpdateCondition; import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; -import com.linkedin.datahub.upgrade.system.vianodes.ReindexDataJobViaNodesCLL; +import com.linkedin.datahub.upgrade.system.graph.vianodes.ReindexDataJobViaNodesCLL; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityService; import io.datahubproject.metadata.context.OperationContext; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexEdgeStatusConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexEdgeStatusConfig.java new file mode 100644 index 0000000000000..97715573eb51f --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexEdgeStatusConfig.java @@ -0,0 +1,31 @@ +package com.linkedin.datahub.upgrade.config.graph; + +import com.linkedin.datahub.upgrade.config.SystemUpdateCondition; +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.datahub.upgrade.system.graph.edgestatus.ReindexEdgeStatus; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Conditional; +import org.springframework.context.annotation.Configuration; + +@Configuration +@Conditional(SystemUpdateCondition.NonBlockingSystemUpdateCondition.class) +public class ReindexEdgeStatusConfig { + + @Bean + public NonBlockingSystemUpgrade reindexEdgeStatus( + final OperationContext opContext, + final EntityService entityService, + final AspectDao aspectDao, + @Value("${elasticsearch.search.graph.graphStatusEnabled}") final boolean featureEnabled, + @Value("${systemUpdate.edgeStatus.enabled}") final boolean enabled, + @Value("${systemUpdate.edgeStatus.batchSize}") final Integer batchSize, + @Value("${systemUpdate.edgeStatus.delayMs}") final Integer delayMs, + @Value("${systemUpdate.edgeStatus.limit}") final Integer limit) { + return new ReindexEdgeStatus( + opContext, entityService, aspectDao, featureEnabled && enabled, batchSize, delayMs, limit); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java index 8e62db444a565..902a80ec107fd 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java @@ -16,6 +16,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.NoSuchElementException; import java.util.Optional; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; @@ -189,7 +190,12 @@ public Function executable() { context.report().addLine(String.format("Rows processed this loop %d", rowsProcessed)); start += args.batchSize; } catch (InterruptedException | ExecutionException e) { - return new DefaultUpgradeStepResult(id(), DataHubUpgradeState.FAILED); + if (e.getCause() instanceof NoSuchElementException) { + context.report().addLine("End of data."); + break; + } else { + return new DefaultUpgradeStepResult(id(), DataHubUpgradeState.FAILED); + } } } } else { diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexEdgeStatus.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexEdgeStatus.java new file mode 100644 index 0000000000000..6b7286a6a0639 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexEdgeStatus.java @@ -0,0 +1,50 @@ +package com.linkedin.datahub.upgrade.system.graph.edgestatus; + +import com.google.common.collect.ImmutableList; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + +/** + * A job that reindexes all status aspects as part of the graph edges containing status information. + * This is required to make sure previously written status information is present in the graph + * index. + */ +@Slf4j +public class ReindexEdgeStatus implements NonBlockingSystemUpgrade { + + private final List _steps; + + public ReindexEdgeStatus( + @Nonnull OperationContext opContext, + EntityService entityService, + AspectDao aspectDao, + boolean enabled, + Integer batchSize, + Integer batchDelayMs, + Integer limit) { + if (enabled) { + _steps = + ImmutableList.of( + new ReindexReindexEdgeStatusStep( + opContext, entityService, aspectDao, batchSize, batchDelayMs, limit)); + } else { + _steps = ImmutableList.of(); + } + } + + @Override + public String id() { + return this.getClass().getName(); + } + + @Override + public List steps() { + return _steps; + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexReindexEdgeStatusStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexReindexEdgeStatusStep.java new file mode 100644 index 0000000000000..6543f82e74563 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexReindexEdgeStatusStep.java @@ -0,0 +1,56 @@ +package com.linkedin.datahub.upgrade.system.graph.edgestatus; + +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; + +import com.linkedin.datahub.upgrade.UpgradeContext; +import com.linkedin.datahub.upgrade.system.AbstractMCLStep; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.jetbrains.annotations.Nullable; + +@Slf4j +public class ReindexReindexEdgeStatusStep extends AbstractMCLStep { + + public ReindexReindexEdgeStatusStep( + OperationContext opContext, + EntityService entityService, + AspectDao aspectDao, + Integer batchSize, + Integer batchDelayMs, + Integer limit) { + super(opContext, entityService, aspectDao, batchSize, batchDelayMs, limit); + } + + @Override + public String id() { + return "edge-status-reindex-v1"; + } + + @Nonnull + @Override + protected String getAspectName() { + return STATUS_ASPECT_NAME; + } + + @Nullable + @Override + protected String getUrnLike() { + return null; + } + + @Override + /** + * Returns whether the upgrade should be skipped. Uses previous run history or the environment + * variable to determine whether to skip. + */ + public boolean skip(UpgradeContext context) { + boolean envFlagRecommendsSkip = Boolean.parseBoolean(System.getenv("SKIP_REINDEX_EDGE_STATUS")); + if (envFlagRecommendsSkip) { + log.info("Environment variable SKIP_REINDEX_EDGE_STATUS is set to true. Skipping."); + } + return (super.skip(context) || envFlagRecommendsSkip); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLL.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLL.java similarity index 95% rename from datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLL.java rename to datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLL.java index fc0b44f57ab49..7a4ca9586f155 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLL.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLL.java @@ -1,4 +1,4 @@ -package com.linkedin.datahub.upgrade.system.vianodes; +package com.linkedin.datahub.upgrade.system.graph.vianodes; import com.google.common.collect.ImmutableList; import com.linkedin.datahub.upgrade.UpgradeStep; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLLStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLLStep.java similarity index 96% rename from datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLLStep.java rename to datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLLStep.java index cf580670ee3a9..e3e07f99bb1ee 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLLStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLLStep.java @@ -1,4 +1,4 @@ -package com.linkedin.datahub.upgrade.system.vianodes; +package com.linkedin.datahub.upgrade.system.graph.vianodes; import static com.linkedin.metadata.Constants.*; diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java index 55a52f072a0ca..df27d33f3a117 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java @@ -9,7 +9,7 @@ import com.linkedin.datahub.upgrade.impl.DefaultUpgradeManager; import com.linkedin.datahub.upgrade.system.SystemUpdateNonBlocking; -import com.linkedin.datahub.upgrade.system.vianodes.ReindexDataJobViaNodesCLL; +import com.linkedin.datahub.upgrade.system.graph.vianodes.ReindexDataJobViaNodesCLL; import com.linkedin.metadata.boot.kafka.MockSystemUpdateDeserializer; import com.linkedin.metadata.boot.kafka.MockSystemUpdateSerializer; import com.linkedin.metadata.config.kafka.KafkaConfiguration; diff --git a/datahub-web-react/package.json b/datahub-web-react/package.json index a1b3a94ace640..dcaef6004d702 100644 --- a/datahub-web-react/package.json +++ b/datahub-web-react/package.json @@ -45,7 +45,7 @@ "dayjs": "^1.11.7", "deepmerge": "^4.2.2", "diff": "^5.0.0", - "dompurify": "^2.3.8", + "dompurify": "^2.5.4", "dotenv": "^8.2.0", "faker": "5.5.3", "graphql": "^15.5.0", diff --git a/datahub-web-react/src/app/entity/group/AddGroupMembersModal.tsx b/datahub-web-react/src/app/entity/group/AddGroupMembersModal.tsx index 2485fb1e48fbe..7666eb04612e5 100644 --- a/datahub-web-react/src/app/entity/group/AddGroupMembersModal.tsx +++ b/datahub-web-react/src/app/entity/group/AddGroupMembersModal.tsx @@ -17,7 +17,7 @@ type Props = { const SelectInput = styled(Select)` > .ant-select-selector { - height: 36px; + height: 'auto'; } `; diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx index 26c90edd82b69..0749ff369c125 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx @@ -10,6 +10,8 @@ import { SearchFiltersSection } from '../../../../../search/SearchFiltersSection import { EntitySearchResults, EntityActionProps } from './EntitySearchResults'; import MatchingViewsLabel from './MatchingViewsLabel'; import { ANTD_GRAY } from '../../../constants'; +import { useIsShowSeparateSiblingsEnabled } from '../../../../../useAppConfig'; +import { combineSiblingsInSearchResults } from '../../../../../search/utils/combineSiblingsInSearchResults'; const SearchBody = styled.div` height: 100%; @@ -129,6 +131,12 @@ export const EmbeddedListSearchResults = ({ onLineageClick, isLineageTab = false, }: Props) => { + const showSeparateSiblings = useIsShowSeparateSiblingsEnabled(); + const combinedSiblingSearchResults = combineSiblingsInSearchResults( + showSeparateSiblings, + searchResponse?.searchResults, + ); + const pageStart = searchResponse?.start || 0; const pageSize = searchResponse?.count || 0; const totalResults = searchResponse?.total || 0; @@ -169,9 +177,9 @@ export const EmbeddedListSearchResults = ({ )} {!loading && !isServerOverloadError && ( ({ + combinedSiblingSearchResults?.map((searchResult) => ({ // when we add impact analysis, we will want to pipe the path to each element to the result this // eslint-disable-next-line @typescript-eslint/dot-notation degree: searchResult['degree'], diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/contract/utils.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/contract/utils.tsx index cc2e1bb7b386e..537750ec279f9 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/contract/utils.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/contract/utils.tsx @@ -111,4 +111,5 @@ export const DATA_QUALITY_ASSERTION_TYPES = new Set([ AssertionType.Sql, AssertionType.Field, AssertionType.Dataset, + AssertionType.Custom, ]); diff --git a/datahub-web-react/src/app/lineage/LineageEntityEdge.tsx b/datahub-web-react/src/app/lineage/LineageEntityEdge.tsx index a0a7db63381df..485c00c7984d8 100644 --- a/datahub-web-react/src/app/lineage/LineageEntityEdge.tsx +++ b/datahub-web-react/src/app/lineage/LineageEntityEdge.tsx @@ -26,11 +26,11 @@ const StyledEyeOutlined = styled(EyeOutlined)` type Props = { edge: VizEdge; - key: string; + edgeKey: string; isHighlighted: boolean; }; -export default function LineageEntityEdge({ edge, key, isHighlighted }: Props) { +export default function LineageEntityEdge({ edge, edgeKey, isHighlighted }: Props) { const createdOnTimestamp = edge?.createdOn; const updatedOnTimestamp = edge?.updatedOn; const createdOn = createdOnTimestamp ? dayjs(createdOnTimestamp).format('ll') : undefined; @@ -59,7 +59,7 @@ export default function LineageEntityEdge({ edge, key, isHighlighted }: Props) { undefined } > - + { diff --git a/datahub-web-react/src/app/lineage/LineageExplorer.tsx b/datahub-web-react/src/app/lineage/LineageExplorer.tsx index 6d5815afe7d30..2d42d164f6626 100644 --- a/datahub-web-react/src/app/lineage/LineageExplorer.tsx +++ b/datahub-web-react/src/app/lineage/LineageExplorer.tsx @@ -77,12 +77,12 @@ export default function LineageExplorer({ urn, type }: Props) { const [isDrawerVisible, setIsDrawVisible] = useState(false); const [selectedEntity, setSelectedEntity] = useState(undefined); - const [asyncEntities, setAsyncEntities] = useState({}); + const [asyncEntities, setAsyncEntities] = useState(new Map()); // In the case that any URL params change, we want to reset asyncEntities. If new parameters are added, // they should be added to the dependency array below. useEffect(() => { - setAsyncEntities({}); + setAsyncEntities(new Map()); // this can also be our hook for emitting the tracking event analytics.event({ @@ -93,7 +93,7 @@ export default function LineageExplorer({ urn, type }: Props) { useEffect(() => { if (showColumns) { - setAsyncEntities({}); + setAsyncEntities(new Map()); } }, [showColumns]); @@ -101,7 +101,7 @@ export default function LineageExplorer({ urn, type }: Props) { const maybeAddAsyncLoadedEntity = useCallback( (entityAndType: EntityAndType) => { - if (entityAndType?.entity.urn && !asyncEntities[entityAndType?.entity.urn]?.fullyFetched) { + if (entityAndType?.entity.urn && !asyncEntities.get(entityAndType?.entity.urn)?.fullyFetched) { // record that we have added this entity let newAsyncEntities = extendAsyncEntities( fineGrainedMap, @@ -145,10 +145,10 @@ export default function LineageExplorer({ urn, type }: Props) { // set asyncEntity to have fullyFetched: false so we can update it in maybeAddAsyncLoadedEntity function resetAsyncEntity(entityUrn: string) { - setAsyncEntities({ - ...asyncEntities, - [entityUrn]: { ...asyncEntities[entityUrn], fullyFetched: false }, - }); + const newAsyncEntities = new Map(asyncEntities); + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + newAsyncEntities.set(entityUrn, { ...asyncEntities.get(entityUrn)!, fullyFetched: false }); + setAsyncEntities(newAsyncEntities); } const handleClose = () => { diff --git a/datahub-web-react/src/app/lineage/LineageTree.tsx b/datahub-web-react/src/app/lineage/LineageTree.tsx index 8b5de4e78ff17..46156baacd611 100644 --- a/datahub-web-react/src/app/lineage/LineageTree.tsx +++ b/datahub-web-react/src/app/lineage/LineageTree.tsx @@ -25,7 +25,7 @@ type LineageTreeProps = { setIsDraggingNode: (isDraggingNode: boolean) => void; draggedNodes: Record; setDraggedNodes: (draggedNodes: Record) => void; - fetchedEntities: { [x: string]: FetchedEntity }; + fetchedEntities: Map; setUpdatedLineages: React.Dispatch>; }; diff --git a/datahub-web-react/src/app/lineage/LineageTreeNodeAndEdgeRenderer.tsx b/datahub-web-react/src/app/lineage/LineageTreeNodeAndEdgeRenderer.tsx index bec83c80107b3..2e6fc2997d4ca 100644 --- a/datahub-web-react/src/app/lineage/LineageTreeNodeAndEdgeRenderer.tsx +++ b/datahub-web-react/src/app/lineage/LineageTreeNodeAndEdgeRenderer.tsx @@ -77,7 +77,7 @@ export default function LineageTreeNodeAndEdgeRenderer({ link.target.data.urn }${link.targetField && `-${link.targetField}`}-${link.target.direction}`; - return ; + return ; })} {nodesToRender.map((node, index) => { const isSelected = node.data.urn === selectedEntity?.urn; diff --git a/datahub-web-react/src/app/lineage/LineageVizInsideZoom.tsx b/datahub-web-react/src/app/lineage/LineageVizInsideZoom.tsx index ab86dfcb335f0..4d4ac317e06f5 100644 --- a/datahub-web-react/src/app/lineage/LineageVizInsideZoom.tsx +++ b/datahub-web-react/src/app/lineage/LineageVizInsideZoom.tsx @@ -36,7 +36,7 @@ const ZoomButton = styled(Button)` type Props = { margin: { top: number; right: number; bottom: number; left: number }; entityAndType?: EntityAndType | null; - fetchedEntities: { [x: string]: FetchedEntity }; + fetchedEntities: Map; onEntityClick: (EntitySelectParams) => void; onEntityCenter: (EntitySelectParams) => void; onLineageExpand: (data: EntityAndType) => void; diff --git a/datahub-web-react/src/app/lineage/LineageVizRootSvg.tsx b/datahub-web-react/src/app/lineage/LineageVizRootSvg.tsx index 434fb1562bc2a..8aa556ae12983 100644 --- a/datahub-web-react/src/app/lineage/LineageVizRootSvg.tsx +++ b/datahub-web-react/src/app/lineage/LineageVizRootSvg.tsx @@ -10,7 +10,7 @@ import constructTree from './utils/constructTree'; type Props = { margin: { top: number; right: number; bottom: number; left: number }; entityAndType?: EntityAndType | null; - fetchedEntities: { [x: string]: FetchedEntity }; + fetchedEntities: Map; onEntityClick: (EntitySelectParams) => void; onEntityCenter: (EntitySelectParams) => void; onLineageExpand: (data: EntityAndType) => void; diff --git a/datahub-web-react/src/app/lineage/__tests__/LineageTree.test.tsx b/datahub-web-react/src/app/lineage/__tests__/LineageTree.test.tsx index 0852a8a32dbfa..571ca51c46b09 100644 --- a/datahub-web-react/src/app/lineage/__tests__/LineageTree.test.tsx +++ b/datahub-web-react/src/app/lineage/__tests__/LineageTree.test.tsx @@ -9,7 +9,7 @@ import { dataset6WithLineage, mocks, } from '../../../Mocks'; -import { Direction, EntityAndType, FetchedEntities } from '../types'; +import { Direction, EntityAndType } from '../types'; import constructTree from '../utils/constructTree'; import LineageTree from '../LineageTree'; import extendAsyncEntities from '../utils/extendAsyncEntities'; @@ -50,7 +50,7 @@ describe('LineageTree', () => { { entity: entry.entity, type: EntityType.Dataset } as EntityAndType, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const downstreamData = constructTree( diff --git a/datahub-web-react/src/app/lineage/__tests__/adjustVXTreeLayout.test.tsx b/datahub-web-react/src/app/lineage/__tests__/adjustVXTreeLayout.test.tsx index ffcc7433426a0..11cc69a142320 100644 --- a/datahub-web-react/src/app/lineage/__tests__/adjustVXTreeLayout.test.tsx +++ b/datahub-web-react/src/app/lineage/__tests__/adjustVXTreeLayout.test.tsx @@ -14,7 +14,7 @@ import { import constructTree from '../utils/constructTree'; import extendAsyncEntities from '../utils/extendAsyncEntities'; import adjustVXTreeLayout from '../utils/adjustVXTreeLayout'; -import { NodeData, Direction, FetchedEntities, EntityAndType } from '../types'; +import { NodeData, Direction, EntityAndType } from '../types'; import { getTestEntityRegistry } from '../../../utils/test-utils/TestPageContainer'; import { Dataset, Entity, EntityType } from '../../../types.generated'; @@ -37,7 +37,7 @@ describe('adjustVXTreeLayout', () => { { entity: entry.entity, type: EntityType.Dataset } as EntityAndType, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const downstreamData = hierarchy( @@ -88,7 +88,7 @@ describe('adjustVXTreeLayout', () => { { entity: entry.entity, type: EntityType.Dataset } as EntityAndType, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const upstreamData = hierarchy( @@ -144,7 +144,7 @@ describe('adjustVXTreeLayout', () => { { entity: entry.entity, type: EntityType.Dataset } as EntityAndType, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const upstreamData = hierarchy( @@ -189,7 +189,7 @@ describe('adjustVXTreeLayout', () => { { entity: entry.entity as Dataset, type: EntityType.Dataset }, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const upstreamData = hierarchy( @@ -234,7 +234,7 @@ describe('adjustVXTreeLayout', () => { { entity: entry.entity as Dataset, type: EntityType.Dataset }, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const upstreamData = hierarchy( diff --git a/datahub-web-react/src/app/lineage/__tests__/constructTree.test.ts b/datahub-web-react/src/app/lineage/__tests__/constructTree.test.ts index 2e41fb9ea07bf..245265cd72296 100644 --- a/datahub-web-react/src/app/lineage/__tests__/constructTree.test.ts +++ b/datahub-web-react/src/app/lineage/__tests__/constructTree.test.ts @@ -12,7 +12,7 @@ import { } from '../../../Mocks'; import { DataPlatform, Dataset, Entity, EntityType, RelationshipDirection } from '../../../types.generated'; import { getTestEntityRegistry } from '../../../utils/test-utils/TestPageContainer'; -import { Direction, EntityAndType, FetchedEntities, UpdatedLineages } from '../types'; +import { Direction, EntityAndType, FetchedEntity, UpdatedLineages } from '../types'; import constructTree from '../utils/constructTree'; import extendAsyncEntities from '../utils/extendAsyncEntities'; @@ -23,7 +23,7 @@ const airflowPlatform: DataPlatform = dataFlow1.platform; describe('constructTree', () => { it('handles nodes without any lineage', () => { - const mockFetchedEntities = {}; + const mockFetchedEntities = new Map(); expect( constructTree( { entity: dataset3, type: EntityType.Dataset }, @@ -61,7 +61,7 @@ describe('constructTree', () => { { entity: entry.entity as Dataset, type: EntityType.Dataset }, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); expect( @@ -113,7 +113,7 @@ describe('constructTree', () => { { entity: entry.entity as Dataset, type: EntityType.Dataset }, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); expect( @@ -166,7 +166,7 @@ describe('constructTree', () => { { entity: entry.entity as Dataset, type: EntityType.Dataset }, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); expect( @@ -261,7 +261,7 @@ describe('constructTree', () => { { entity: entry.entity as Dataset, type: EntityType.Dataset }, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const tree = constructTree( @@ -292,7 +292,7 @@ describe('constructTree', () => { { entity: entry.entity as Entity, type: EntityType.Dataset } as EntityAndType, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); expect( constructTree( @@ -379,7 +379,7 @@ describe('constructTree', () => { { entity: entry.entity as Dataset, type: entry.entity.type as EntityType } as EntityAndType, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); expect( constructTree( @@ -434,7 +434,7 @@ describe('constructTree', () => { { entity: entry.entity as Dataset, type: EntityType.Dataset }, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const updatedLineages: UpdatedLineages = { diff --git a/datahub-web-react/src/app/lineage/types.ts b/datahub-web-react/src/app/lineage/types.ts index b4d73d0c9185e..0edc3a3a7c039 100644 --- a/datahub-web-react/src/app/lineage/types.ts +++ b/datahub-web-react/src/app/lineage/types.ts @@ -111,7 +111,7 @@ export type ColumnEdge = { targetField: string; }; -export type FetchedEntities = { [x: string]: FetchedEntity }; +export type FetchedEntities = Map; export enum Direction { Upstream = 'Upstream', @@ -126,7 +126,7 @@ export type LineageExplorerParams = { export type TreeProps = { margin?: { top: number; right: number; bottom: number; left: number }; entityAndType?: EntityAndType | null; - fetchedEntities: { [x: string]: FetchedEntity }; + fetchedEntities: Map; onEntityClick: (EntitySelectParams) => void; onEntityCenter: (EntitySelectParams) => void; onLineageExpand: (data: EntityAndType) => void; diff --git a/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx b/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx index c11d8fe90cfa9..235fce08a85dd 100644 --- a/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx +++ b/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx @@ -103,9 +103,7 @@ describe('getPopulatedColumnsByUrn', () => { }, ] as FineGrainedLineage[], }; - const fetchedEntities = { - [dataJobWithCLL.urn]: dataJobWithCLL as FetchedEntity, - }; + const fetchedEntities = new Map([[dataJobWithCLL.urn, dataJobWithCLL as FetchedEntity]]); const columnsByUrn = getPopulatedColumnsByUrn({}, fetchedEntities); expect(columnsByUrn).toMatchObject({ diff --git a/datahub-web-react/src/app/lineage/utils/__tests__/extendAsyncEntities.test.ts b/datahub-web-react/src/app/lineage/utils/__tests__/extendAsyncEntities.test.ts index ad28bccbbd85a..6af9b1b2c0d97 100644 --- a/datahub-web-react/src/app/lineage/utils/__tests__/extendAsyncEntities.test.ts +++ b/datahub-web-react/src/app/lineage/utils/__tests__/extendAsyncEntities.test.ts @@ -19,16 +19,20 @@ describe('extendColumnLineage', () => { }, ] as FineGrainedLineage[], }; - const fetchedEntities = { - [dataJobWithCLL.urn]: dataJobWithCLL as FetchedEntity, - }; + const fetchedEntities = new Map([[dataJobWithCLL.urn, dataJobWithCLL as FetchedEntity]]); const fineGrainedMap = { forward: {}, reverse: {} }; extendColumnLineage(dataJobWithCLL, fineGrainedMap, {}, fetchedEntities); expect(fineGrainedMap).toMatchObject({ forward: { - [dataJob1.urn]: { test1: { [dataset2.urn]: ['test2'] }, test3: { [dataset2.urn]: ['test4'] } }, - [dataset1.urn]: { test1: { [dataJob1.urn]: ['test1'] }, test3: { [dataJob1.urn]: ['test3'] } }, + [dataJob1.urn]: { + test1: { [dataset2.urn]: ['test2'] }, + test3: { [dataset2.urn]: ['test4'] }, + }, + [dataset1.urn]: { + test1: { [dataJob1.urn]: ['test1'] }, + test3: { [dataJob1.urn]: ['test3'] }, + }, }, reverse: { [dataJob1.urn]: { test1: { [dataset1.urn]: ['test1'] }, test3: { [dataset1.urn]: ['test3'] } }, diff --git a/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts b/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts index 60b1698444168..c3483f229f7a0 100644 --- a/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts +++ b/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts @@ -71,10 +71,10 @@ export function convertInputFieldsToSchemaFields(inputFields?: InputFields) { */ export function getPopulatedColumnsByUrn( columnsByUrn: Record, - fetchedEntities: { [x: string]: FetchedEntity }, + fetchedEntities: Map, ) { let populatedColumnsByUrn = { ...columnsByUrn }; - Object.entries(fetchedEntities).forEach(([urn, fetchedEntity]) => { + Array.from(fetchedEntities.entries()).forEach(([urn, fetchedEntity]) => { if (fetchedEntity.schemaMetadata && !columnsByUrn[urn]) { populatedColumnsByUrn = { ...populatedColumnsByUrn, @@ -122,7 +122,7 @@ export function getPopulatedColumnsByUrn( export function populateColumnsByUrn( columnsByUrn: Record, - fetchedEntities: { [x: string]: FetchedEntity }, + fetchedEntities: Map, setColumnsByUrn: (colsByUrn: Record) => void, ) { setColumnsByUrn(getPopulatedColumnsByUrn(columnsByUrn, fetchedEntities)); diff --git a/datahub-web-react/src/app/lineage/utils/constructFetchedNode.ts b/datahub-web-react/src/app/lineage/utils/constructFetchedNode.ts index 778d0e325f7cb..12d4cca352bb3 100644 --- a/datahub-web-react/src/app/lineage/utils/constructFetchedNode.ts +++ b/datahub-web-react/src/app/lineage/utils/constructFetchedNode.ts @@ -36,7 +36,7 @@ export default function constructFetchedNode( } const newConstructionPath = [...constructionPath, urn]; - const fetchedNode = fetchedEntities[urn]; + const fetchedNode = fetchedEntities.get(urn); if (constructedNodes[urn]) { return constructedNodes[urn]; @@ -53,7 +53,7 @@ export default function constructFetchedNode( subtype: fetchedNode.subtype, icon: fetchedNode.icon, unexploredChildren: - fetchedNode?.[childrenKey]?.filter((childUrn) => !(childUrn.entity.urn in fetchedEntities)).length || 0, + fetchedNode?.[childrenKey]?.filter((childUrn) => !fetchedEntities.has(childUrn.entity.urn)).length || 0, countercurrentChildrenUrns: fetchedNode?.[direction === Direction.Downstream ? 'upstreamChildren' : 'downstreamChildren']?.map( (child) => child.entity.urn, @@ -88,7 +88,7 @@ export default function constructFetchedNode( ); }) ?.filter((child) => { - const childEntity = fetchedEntities[child?.urn || '']; + const childEntity = fetchedEntities.get(child?.urn || ''); const parentChildren = fetchedNode[childrenKey]; return shouldIncludeChildEntity(direction, parentChildren, childEntity, fetchedNode); }) diff --git a/datahub-web-react/src/app/lineage/utils/constructTree.ts b/datahub-web-react/src/app/lineage/utils/constructTree.ts index 7da6fc56b57bd..38a865ea9e093 100644 --- a/datahub-web-react/src/app/lineage/utils/constructTree.ts +++ b/datahub-web-react/src/app/lineage/utils/constructTree.ts @@ -62,15 +62,14 @@ export default function constructTree( const constructedNodes = {}; let updatedFetchedEntities = fetchedEntities; - Object.entries(updatedFetchedEntities).forEach((entry) => { - const [urn, fetchedEntity] = entry; + Array.from(updatedFetchedEntities.entries()).forEach(([urn, fetchedEntity]) => { if (urn in updatedLineages) { - updatedFetchedEntities[urn] = updateFetchedEntity(fetchedEntity, updatedLineages); + updatedFetchedEntities.set(urn, updateFetchedEntity(fetchedEntity, updatedLineages)); } }); Object.values(updatedLineages).forEach((updatedLineage) => { (updatedLineage as any).entitiesToAdd.forEach((entity) => { - if (!(entity.urn in updatedFetchedEntities)) { + if (!updatedFetchedEntities.has(entity.urn)) { updatedFetchedEntities = extendAsyncEntities( {}, {}, @@ -125,7 +124,7 @@ export default function constructTree( ]); }) ?.filter((child) => { - const childEntity = updatedFetchedEntities[child?.urn || '']; + const childEntity = updatedFetchedEntities.get(child?.urn || ''); return shouldIncludeChildEntity(direction, children, childEntity, fetchedEntity); }) ?.filter(Boolean) as Array; diff --git a/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts b/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts index 30e81a37dc380..7deca50b154c7 100644 --- a/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts +++ b/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts @@ -115,8 +115,8 @@ export function extendColumnLineage( // if this upstreamEntityUrn is a sibling of one of the already rendered nodes, // update the fine grained map with the rendered node instead of its sibling - Object.keys(fetchedEntities).forEach((urn) => { - fetchedEntities[urn].siblings?.siblings?.forEach((sibling) => { + Array.from(fetchedEntities.keys()).forEach((urn) => { + fetchedEntities.get(urn)?.siblings?.siblings?.forEach((sibling) => { if (sibling && sibling.urn === upstreamEntityUrn) { updateFineGrainedMap( fineGrainedMap, @@ -188,7 +188,7 @@ export default function extendAsyncEntities( entityAndType: EntityAndType, fullyFetched = false, ): FetchedEntities { - if (fetchedEntities[entityAndType.entity.urn]?.fullyFetched) { + if (fetchedEntities.get(entityAndType.entity.urn)?.fullyFetched) { return fetchedEntities; } @@ -198,11 +198,7 @@ export default function extendAsyncEntities( extendColumnLineage(lineageVizConfig, fineGrainedMap, fineGrainedMapForSiblings, fetchedEntities); - return { - ...fetchedEntities, - [entityAndType.entity.urn]: { - ...lineageVizConfig, - fullyFetched, - }, - }; + const newFetchedEntities = new Map(fetchedEntities); + newFetchedEntities.set(entityAndType.entity.urn, { ...lineageVizConfig, fullyFetched }); + return newFetchedEntities; } diff --git a/datahub-web-react/src/app/lineage/utils/useSortColumnsBySelectedField.ts b/datahub-web-react/src/app/lineage/utils/useSortColumnsBySelectedField.ts index dc0d3ea2f0376..d27764d6ed7b5 100644 --- a/datahub-web-react/src/app/lineage/utils/useSortColumnsBySelectedField.ts +++ b/datahub-web-react/src/app/lineage/utils/useSortColumnsBySelectedField.ts @@ -12,7 +12,7 @@ import { } from './columnLineageUtils'; import { LineageExplorerContext } from './LineageExplorerContext'; -export default function useSortColumnsBySelectedField(fetchedEntities: { [x: string]: FetchedEntity }) { +export default function useSortColumnsBySelectedField(fetchedEntities: Map) { const { highlightedEdges, selectedField, columnsByUrn, setColumnsByUrn } = useContext(LineageExplorerContext); const previousSelectedField = usePrevious(selectedField); @@ -37,15 +37,15 @@ export default function useSortColumnsBySelectedField(fetchedEntities: { [x: str setColumnsByUrn(updatedColumnsByUrn); } else if (!selectedField && previousSelectedField !== selectedField) { Object.entries(columnsByUrn).forEach(([urn, columns]) => { - const fetchedEntity = fetchedEntities[urn]; - if (fetchedEntity && fetchedEntity.schemaMetadata) { + const fetchedEntity = fetchedEntities.get(urn); + if (fetchedEntity?.schemaMetadata) { updatedColumnsByUrn = sortColumnsByDefault( updatedColumnsByUrn, columns, convertFieldsToV1FieldPath(fetchedEntity.schemaMetadata.fields), urn, ); - } else if (fetchedEntity && fetchedEntity.inputFields) { + } else if (fetchedEntity?.inputFields) { updatedColumnsByUrn = sortColumnsByDefault( updatedColumnsByUrn, columns, diff --git a/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx b/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx index 88a1388ba9589..37349585fa4c9 100644 --- a/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx +++ b/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx @@ -3,9 +3,14 @@ import { Link } from 'react-router-dom'; import { Button, Divider, Modal, Tag, Typography } from 'antd'; import styled from 'styled-components'; import { useEntityRegistry } from '../../useEntityRegistry'; -import { Maybe, Policy, PolicyState, PolicyType } from '../../../types.generated'; +import { Maybe, Policy, PolicyMatchCondition, PolicyState, PolicyType } from '../../../types.generated'; import { useAppConfig } from '../../useAppConfig'; -import { convertLegacyResourceFilter, getFieldValues, mapResourceTypeToDisplayName } from './policyUtils'; +import { + convertLegacyResourceFilter, + getFieldValues, + getFieldCondition, + mapResourceTypeToDisplayName, +} from './policyUtils'; import AvatarsGroup from '../AvatarsGroup'; type PrivilegeOptionType = { @@ -70,6 +75,7 @@ export default function PolicyDetailsModal({ policy, open, onClose, privileges } const resourceTypes = getFieldValues(resources?.filter, 'TYPE') || []; const dataPlatformInstances = getFieldValues(resources?.filter, 'DATA_PLATFORM_INSTANCE') || []; const resourceEntities = getFieldValues(resources?.filter, 'URN') || []; + const resourceFilterCondition = getFieldCondition(resources?.filter, 'URN') || PolicyMatchCondition.Equals; const domains = getFieldValues(resources?.filter, 'DOMAIN') || []; const { @@ -104,6 +110,10 @@ export default function PolicyDetailsModal({ policy, open, onClose, privileges } ); }; + const getWildcardUrnTag = (criterionValue) => { + return {criterionValue.value}*; + }; + const resourceOwnersField = (actors) => { if (!actors?.resourceOwners) { return No; @@ -166,7 +176,10 @@ export default function PolicyDetailsModal({ policy, open, onClose, privileges } return ( // eslint-disable-next-line react/no-array-index-key - {getEntityTag(value)} + {resourceFilterCondition && + resourceFilterCondition === PolicyMatchCondition.StartsWith + ? getWildcardUrnTag(value) + : getEntityTag(value)} ); })) || All} diff --git a/datahub-web-react/src/app/permissions/policy/policyUtils.ts b/datahub-web-react/src/app/permissions/policy/policyUtils.ts index 725e39d82d62e..b71a38f80fc25 100644 --- a/datahub-web-react/src/app/permissions/policy/policyUtils.ts +++ b/datahub-web-react/src/app/permissions/policy/policyUtils.ts @@ -118,6 +118,10 @@ export const getFieldValues = (filter: Maybe | undefined, res return filter?.criteria?.find((criterion) => criterion.field === resourceFieldType)?.values || []; }; +export const getFieldCondition = (filter: Maybe | undefined, resourceFieldType: string) => { + return filter?.criteria?.find((criterion) => criterion.field === resourceFieldType)?.condition || null; +}; + export const getFieldValuesOfTags = (filter: Maybe | undefined, resourceFieldType: string) => { return filter?.criteria?.find((criterion) => criterion.field === resourceFieldType)?.values || []; }; diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index 1874239751723..17ad6f881b0ab 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -758,16 +758,6 @@ fragment schemaFieldFields on SchemaField { } } } - parent { - urn - type - ...entityDisplayNameFields - ... on Dataset { - platform { - ...platformFields - } - } - } } } diff --git a/datahub-web-react/yarn.lock b/datahub-web-react/yarn.lock index 558711251d490..8d5899d9891f1 100644 --- a/datahub-web-react/yarn.lock +++ b/datahub-web-react/yarn.lock @@ -5435,10 +5435,10 @@ domino@^2.1.6: resolved "https://registry.yarnpkg.com/domino/-/domino-2.1.6.tgz#fe4ace4310526e5e7b9d12c7de01b7f485a57ffe" integrity sha512-3VdM/SXBZX2omc9JF9nOPCtDaYQ67BGp5CoLpIQlO2KCAPETs8TcDHacF26jXadGbvUteZzRTeos2fhID5+ucQ== -dompurify@^2.3.8: - version "2.3.8" - resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-2.3.8.tgz#224fe9ae57d7ebd9a1ae1ac18c1c1ca3f532226f" - integrity sha512-eVhaWoVibIzqdGYjwsBWodIQIaXFSB+cKDf4cfxLMsK0xiud6SE+/WCVx/Xw/UwQsa4cS3T2eITcdtmTg2UKcw== +dompurify@^2.5.4: + version "2.5.4" + resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-2.5.4.tgz#347e91070963b22db31c7c8d0ce9a0a2c3c08746" + integrity sha512-l5NNozANzaLPPe0XaAwvg3uZcHtDBnziX/HjsY1UcDj1MxTK8Dd0Kv096jyPK5HRzs/XM5IMj20dW8Fk+HnbUA== dot-case@^3.0.4: version "3.0.4" @@ -5459,9 +5459,9 @@ dotenv@^8.2.0: integrity sha512-IrPdXQsk2BbzvCBGBOTmmSH5SodmqZNt4ERAZDmW4CT+tL8VtvinqywuANaFu4bOMWki16nqf0e4oC0QIaDr/g== dset@^3.1.2: - version "3.1.3" - resolved "https://registry.yarnpkg.com/dset/-/dset-3.1.3.tgz#c194147f159841148e8e34ca41f638556d9542d2" - integrity sha512-20TuZZHCEZ2O71q9/+8BwKwZ0QtD9D8ObhrihJPr+vLLYlSuAU3/zL4cSlgbfeoGHTjCSJBa7NGcrF9/Bx/WJQ== + version "3.1.4" + resolved "https://registry.yarnpkg.com/dset/-/dset-3.1.4.tgz#f8eaf5f023f068a036d08cd07dc9ffb7d0065248" + integrity sha512-2QF/g9/zTaPDc3BjNcVTGoBbXBgYfMTTceLaYcFJ/W9kggFUkhxD/hMEeuLKbugyef9SqAx8cpgwlIP/jinUTA== duplexer@^0.1.2: version "0.1.2" @@ -8215,9 +8215,9 @@ path-root@^0.1.1: path-root-regex "^0.1.0" path-to-regexp@^1.7.0: - version "1.8.0" - resolved "https://registry.yarnpkg.com/path-to-regexp/-/path-to-regexp-1.8.0.tgz#887b3ba9d84393e87a0a0b9f4cb756198b53548a" - integrity sha512-n43JRhlUKUAlibEJhPeir1ncUID16QnEjNpwzNdO3Lm4ywrBpBZ5oLD0I6br9evr1Y9JTqwRtAh7JLoOzAQdVA== + version "1.9.0" + resolved "https://registry.yarnpkg.com/path-to-regexp/-/path-to-regexp-1.9.0.tgz#5dc0753acbf8521ca2e0f137b4578b917b10cf24" + integrity sha512-xIp7/apCFJuUHdDLWe8O1HIkb0kQrOMb/0u6FXQjemHn/ii5LrIzU6bdECnsiTF/GjZkMEKg1xdiZwNqDYlZ6g== dependencies: isarray "0.0.1" diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index c9448fa34c687..6e3e5780506ac 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -125,7 +125,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev - ${DATAHUB_LOCAL_GMS_ENV:-empty2.env} environment: &datahub-gms-dev-env <<: [*datahub-dev-telemetry-env, *datahub-gms-env] - ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-/etc/datahub/search/search_config.yaml} + ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-search_config.yaml} SKIP_ELASTICSEARCH_CHECK: false JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001' BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: false diff --git a/docs/cli.md b/docs/cli.md index 1f1e6dfa26be7..c109d02e0ad51 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -34,9 +34,9 @@ datahub init # authenticate your datahub CLI with your datahub instance ``` -If you run into an error, try checking the [_common setup issues_](../metadata-ingestion/developing.md#Common-setup-issues). +If you run into an error, try checking the [_common setup issues_](../metadata-ingestion/developing.md#common-setup-issues). -Other installation options such as installation from source and running the cli inside a container are available further below in the guide [here](#alternate-installation-options) +Other installation options such as installation from source and running the cli inside a container are available further below in the guide [here](#alternate-installation-options). ## Starter Commands @@ -672,7 +672,6 @@ Old Entities Migrated = {'urn:li:dataset:(urn:li:dataPlatform:hive,logging_event ### Using docker [![Docker Hub](https://img.shields.io/docker/pulls/acryldata/datahub-ingestion?style=plastic)](https://hub.docker.com/r/acryldata/datahub-ingestion) -[![datahub-ingestion docker](https://github.com/acryldata/datahub/workflows/datahub-ingestion%20docker/badge.svg)](https://github.com/acryldata/datahub/actions/workflows/docker-ingestion.yml) If you don't want to install locally, you can alternatively run metadata ingestion within a Docker container. We have prebuilt images available on [Docker hub](https://hub.docker.com/r/acryldata/datahub-ingestion). All plugins will be installed and enabled automatically. diff --git a/docs/how/restore-indices.md b/docs/how/restore-indices.md index 368b385ae5ea5..447e08c2dc6f0 100644 --- a/docs/how/restore-indices.md +++ b/docs/how/restore-indices.md @@ -21,6 +21,7 @@ datahub docker quickstart --restore-indices :::info Using the `datahub` CLI to restore the indices when using the quickstart images will also clear the search and graph indices before restoring. +::: See [this section](../quickstart.md#restore-datahub) for more information. @@ -34,6 +35,7 @@ If you are on a custom docker-compose deployment, run the following command (you :::info By default this command will not clear the search and graph indices before restoring, thous potentially leading to inconsistencies between the local database and the indices, in case aspects were previously deleted in the local database but were not removed from the correponding index. +::: If you need to clear the search and graph indices before restoring, add `-a clean` to the end of the command. Please take note that the search and graph services might not be fully functional during reindexing when the indices are cleared. @@ -67,6 +69,7 @@ Once the job completes, your indices will have been restored. :::info By default the restore indices job template will not clear the search and graph indices before restoring, thous potentially leading to inconsistencies between the local database and the indices, in case aspects were previously deleted in the local database but were not removed from the correponding index. +::: If you need to clear the search and graph indices before restoring, modify the `values.yaml` for your deployment and overwrite the default arguments of the restore indices job template to include the `-a clean` argument. Please take note that the search and graph services might not be fully functional during reindexing when the indices are cleared. diff --git a/docs/how/search.md b/docs/how/search.md index c809ab1efba12..5c1ba266ee2ae 100644 --- a/docs/how/search.md +++ b/docs/how/search.md @@ -85,8 +85,8 @@ These examples are non exhaustive and using Datasets as a reference. If you want to: - Exact match on term or phrase - - ```"datahub_schema"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22datahub_schema%22) - - ```datahub_schema``` [Sample results](https://demo.datahubproject.io/search?page=1&query=datahub_schema) + - ```"pet profile"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22pet%20profile%22) + - ```pet profile``` [Sample results](https://demo.datahubproject.io/search?page=1&query=pet%20profile) - Enclosing one or more terms with double quotes will enforce exact matching on these terms, preventing further tokenization. - Exclude terms diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java index 8777be57e1bd8..e999471488dd7 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java @@ -14,6 +14,7 @@ import java.util.Optional; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.AllArgsConstructor; import lombok.Data; import lombok.EqualsAndHashCode; @@ -36,6 +37,10 @@ public class Edge { @EqualsAndHashCode.Include private Urn lifecycleOwner; // An entity through which the edge between source and destination is created @EqualsAndHashCode.Include private Urn via; + @EqualsAndHashCode.Exclude @Nullable private Boolean sourceStatus; + @EqualsAndHashCode.Exclude @Nullable private Boolean destinationStatus; + @EqualsAndHashCode.Exclude @Nullable private Boolean viaStatus; + @EqualsAndHashCode.Exclude @Nullable private Boolean lifecycleOwnerStatus; // For backwards compatibility public Edge( @@ -57,6 +62,38 @@ public Edge( updatedActor, properties, null, + null, + null, + null, + null, + null); + } + + public Edge( + Urn source, + Urn destination, + String relationshipType, + Long createdOn, + Urn createdActor, + Long updatedOn, + Urn updatedActor, + Map properties, + Urn lifecycleOwner, + Urn via) { + this( + source, + destination, + relationshipType, + createdOn, + createdActor, + updatedOn, + updatedActor, + properties, + lifecycleOwner, + via, + null, + null, + null, null); } @@ -91,6 +128,10 @@ public String toDocId(@Nonnull String idHashAlgo) { public static final String EDGE_FIELD_LIFECYCLE_OWNER = "lifecycleOwner"; public static final String EDGE_SOURCE_URN_FIELD = "source.urn"; public static final String EDGE_DESTINATION_URN_FIELD = "destination.urn"; + public static final String EDGE_SOURCE_STATUS = "source.removed"; + public static final String EDGE_DESTINATION_STATUS = "destination.removed"; + public static final String EDGE_FIELD_VIA_STATUS = "viaRemoved"; + public static final String EDGE_FIELD_LIFECYCLE_OWNER_STATUS = "lifecycleOwnerRemoved"; public static final List> KEY_SORTS = ImmutableList.of( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/EdgeUrnType.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/EdgeUrnType.java new file mode 100644 index 0000000000000..2fc2f4b588e8b --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/EdgeUrnType.java @@ -0,0 +1,8 @@ +package com.linkedin.metadata.aspect.models.graph; + +public enum EdgeUrnType { + SOURCE, + DESTINATION, + VIA, + LIFECYCLE_OWNER +} diff --git a/gradle.properties b/gradle.properties index 4bdbd3d89286c..e42e18dab677b 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,10 +1,14 @@ -org.gradle.daemon=false org.gradle.configureondemand=true org.gradle.parallel=true org.gradle.caching=true +# Cycle daemons after 30m +org.gradle.daemon.idletimeout=1800000 + # Increase gradle JVM memory to 5GB to allow tests to run locally -org.gradle.jvmargs=-Xmx5120m +org.gradle.jvmargs=-Xmx5120m -XX:MaxMetaspaceSize=512m +org.gradle.workers.max=4 + # Increase retries to 5 (from default of 3) and increase interval from 125ms to 1s. # Based on this thread https://github.com/gradle/gradle/issues/4629, it's unclear # if we should be using systemProp or not. We're using both for now. diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authentication/Authentication.java b/metadata-auth/auth-api/src/main/java/com/datahub/authentication/Authentication.java index b53d868e6e878..7583a4efd6425 100644 --- a/metadata-auth/auth-api/src/main/java/com/datahub/authentication/Authentication.java +++ b/metadata-auth/auth-api/src/main/java/com/datahub/authentication/Authentication.java @@ -4,8 +4,10 @@ import java.util.Map; import java.util.Objects; import javax.annotation.Nonnull; +import lombok.EqualsAndHashCode; /** Class representing an authenticated actor accessing DataHub. */ +@EqualsAndHashCode public class Authentication { private final Actor authenticatedActor; diff --git a/metadata-ingestion/developing.md b/metadata-ingestion/developing.md index 9293fc7a369dc..19a18c5275a3b 100644 --- a/metadata-ingestion/developing.md +++ b/metadata-ingestion/developing.md @@ -55,7 +55,6 @@ logger.debug("this is the sample debug line") #3. click on the `log` option ``` - > **P.S. if you are not able to see the log lines, then restart the `airflow scheduler` and rerun the DAG** ### (Optional) Set up your Python environment for developing on Dagster Plugin @@ -70,6 +69,7 @@ datahub version # should print "DataHub CLI version: unavailable (installed in ``` ### (Optional) Set up your Python environment for developing on Prefect Plugin + From the repository root: ```shell @@ -127,6 +127,18 @@ This sometimes happens if there's a version mismatch between the Kafka's C libra +
+ Conflict: acryl-datahub requires pydantic 1.10 + +The base `acryl-datahub` package supports both Pydantic 1.x and 2.x. However, some of our specific sources require Pydantic 1.x because of transitive dependencies. + +If you're primarily using `acryl-datahub` for the SDKs, you can install `acryl-datahub` and some extras, like `acryl-datahub[sql-parser]`, without getting conflicts related to Pydantic versioning. + +We recommend not installing full ingestion sources into your main environment (e.g. avoid having a dependency on `acryl-datahub[snowflake]` or other ingestion sources). +Instead, we recommend using UI-based ingestion or isolating the ingestion pipelines using [virtual environments](https://docs.python.org/3/library/venv.html). If you're using an orchestrator, they often have first-class support for virtual environments - here's an [example for Airflow](./schedule_docs/airflow.md). + +
+ ### Using Plugins in Development The syntax for installing plugins is slightly different in development. For example: @@ -286,4 +298,4 @@ tox -- --update-golden-files # Update golden files for a specific environment. tox -e py310-airflow26 -- --update-golden-files -``` \ No newline at end of file +``` diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 8b778048c3475..bf80172441405 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -101,7 +101,7 @@ sqlglot_lib = { # Using an Acryl fork of sqlglot. # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:main?expand=1 - "acryl-sqlglot[rs]==25.20.2.dev5", + "acryl-sqlglot[rs]==25.20.2.dev6", } classification_lib = { diff --git a/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/__init__.py b/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py b/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py new file mode 100644 index 0000000000000..fc164c8479365 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py @@ -0,0 +1,133 @@ +import dataclasses +import json +from typing import Dict, Iterable, Optional + +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.metadata.schema_classes import ( + ChangeTypeClass, + DatasetPropertiesClass, + GenericAspectClass, + MetadataChangeProposalClass, + OperationClass, + TimeStampClass, +) +from datahub.specific.dataset import DatasetPatchBuilder +from datahub.utilities.urns.urn import guess_entity_type + + +@dataclasses.dataclass +class TimestampPair: + last_modified_dataset_props: Optional[ + TimeStampClass + ] # last_modified of datasetProperties aspect + last_updated_timestamp_dataset_props: Optional[ + int + ] # lastUpdatedTimestamp of the operation aspect + + +def try_aspect_from_metadata_change_proposal_class( + wu: MetadataWorkUnit, +) -> Optional[DatasetPropertiesClass]: + if ( + isinstance(wu.metadata, MetadataChangeProposalClass) + and wu.metadata.aspectName == "datasetProperties" + and wu.metadata.changeType == ChangeTypeClass.PATCH + and isinstance(wu.metadata.aspect, GenericAspectClass) + ): + patch_dataset_properties = json.loads(wu.metadata.aspect.value) + for operation in patch_dataset_properties: + if operation.get("path") == "/lastModified": + # Deserializing `lastModified` as the `auto_patch_last_modified` function relies on this property + # to decide if a patch aspect for the datasetProperties aspect should be generated + return DatasetPropertiesClass( + lastModified=TimeStampClass(time=operation["value"]["time"]) + ) + + return None + + +def auto_patch_last_modified( + stream: Iterable[MetadataWorkUnit], +) -> Iterable[MetadataWorkUnit]: + """ + Generate a patch request for datasetProperties aspect in-case + 1. `lastModified` of datasetProperties is not set + 2. And there are operation aspects + in this case set the `lastModified` of datasetProperties to max value of operation aspects `lastUpdatedTimestamp`. + + We need this functionality to support sort by `last modified` on UI. + """ + candidate_dataset_for_patch: Dict[str, TimestampPair] = {} + + for wu in stream: + if ( + guess_entity_type(wu.get_urn()) != "dataset" + ): # we are only processing datasets + yield wu + continue + + dataset_properties_aspect = wu.get_aspect_of_type( + DatasetPropertiesClass + ) or try_aspect_from_metadata_change_proposal_class(wu) + dataset_operation_aspect = wu.get_aspect_of_type(OperationClass) + + timestamp_pair = candidate_dataset_for_patch.get(wu.get_urn()) + + if timestamp_pair: + # Update the timestamp_pair + if dataset_properties_aspect and dataset_properties_aspect.lastModified: + timestamp_pair.last_modified_dataset_props = ( + dataset_properties_aspect.lastModified + ) + + if ( + dataset_operation_aspect + and dataset_operation_aspect.lastUpdatedTimestamp + ): + timestamp_pair.last_updated_timestamp_dataset_props = max( + timestamp_pair.last_updated_timestamp_dataset_props or 0, + dataset_operation_aspect.lastUpdatedTimestamp, + ) + + else: + # Create new TimestampPair + last_modified_dataset_props: Optional[TimeStampClass] = None + last_updated_timestamp_dataset_props: Optional[int] = None + + if dataset_properties_aspect: + last_modified_dataset_props = dataset_properties_aspect.lastModified + + if dataset_operation_aspect: + last_updated_timestamp_dataset_props = ( + dataset_operation_aspect.lastUpdatedTimestamp + ) + + candidate_dataset_for_patch[wu.get_urn()] = TimestampPair( + last_modified_dataset_props=last_modified_dataset_props, + last_updated_timestamp_dataset_props=last_updated_timestamp_dataset_props, + ) + + yield wu + + # Emit a patch datasetProperties aspect for dataset where last_modified is None + for entity_urn, timestamp_pair in candidate_dataset_for_patch.items(): + # Emit patch if last_modified is not set and last_updated_timestamp is set + if ( + timestamp_pair.last_modified_dataset_props is None + and timestamp_pair.last_updated_timestamp_dataset_props + ): + dataset_patch_builder = DatasetPatchBuilder(urn=entity_urn) + + dataset_patch_builder.set_last_modified( + timestamp=TimeStampClass( + time=timestamp_pair.last_updated_timestamp_dataset_props + ) + ) + + yield from [ + MetadataWorkUnit( + id=MetadataWorkUnit.generate_workunit_id(mcp), + mcp_raw=mcp, + ) + for mcp in dataset_patch_builder.build() + ] diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index 3dea3d36f41f1..85ae17ddf6529 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -28,6 +28,9 @@ from datahub.configuration.common import ConfigModel from datahub.configuration.source_common import PlatformInstanceConfigMixin from datahub.emitter.mcp_builder import mcps_from_mce +from datahub.ingestion.api.auto_work_units.auto_dataset_properties_aspect import ( + auto_patch_last_modified, +) from datahub.ingestion.api.closeable import Closeable from datahub.ingestion.api.common import PipelineContext, RecordEnvelope, WorkUnit from datahub.ingestion.api.report import Report @@ -443,6 +446,7 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: ), browse_path_processor, partial(auto_workunit_reporter, self.get_report()), + auto_patch_last_modified, ] @staticmethod diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index c783d9a35814b..0fdb7bb537457 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -527,6 +527,50 @@ def get_aspects_for_entity( return result + def get_entity_as_mcps( + self, entity_urn: str, aspects: Optional[List[str]] = None + ) -> List[MetadataChangeProposalWrapper]: + """Get all non-timeseries aspects for an entity. + + By formatting the entity's aspects as MCPWs, we can also include SystemMetadata. + + Warning: Do not use this method to determine if an entity exists! This method will always return + something, even if the entity doesn't actually exist in DataHub. + + Args: + entity_urn: The urn of the entity + aspects: Optional list of aspect names being requested (e.g. ["schemaMetadata", "datasetProperties"]) + + Returns: + A list of MCPWs. + """ + + response_json = self.get_entity_raw(entity_urn, aspects) + + # Now, we parse the response into proper aspect objects. + results: List[MetadataChangeProposalWrapper] = [] + for aspect_name, aspect_json in response_json.get("aspects", {}).items(): + aspect_type = ASPECT_NAME_MAP.get(aspect_name) + if aspect_type is None: + logger.warning(f"Ignoring unknown aspect type {aspect_name}") + continue + + post_json_obj = post_json_transform(aspect_json) + aspect_value = aspect_type.from_obj(post_json_obj["value"]) + + system_metadata_raw = post_json_obj["systemMetadata"] + system_metadata = SystemMetadataClass.from_obj(system_metadata_raw) + + mcpw = MetadataChangeProposalWrapper( + entityUrn=entity_urn, + aspect=aspect_value, + systemMetadata=system_metadata, + ) + + results.append(mcpw) + + return results + def get_entity_semityped( self, entity_urn: str, aspects: Optional[List[str]] = None ) -> AspectBag: @@ -545,19 +589,12 @@ def get_entity_semityped( not be present in the dictionary. The entity's key aspect will always be present. """ - response_json = self.get_entity_raw(entity_urn, aspects) + mcps = self.get_entity_as_mcps(entity_urn, aspects) - # Now, we parse the response into proper aspect objects. result: AspectBag = {} - for aspect_name, aspect_json in response_json.get("aspects", {}).items(): - aspect_type = ASPECT_NAME_MAP.get(aspect_name) - if aspect_type is None: - logger.warning(f"Ignoring unknown aspect type {aspect_name}") - continue - - post_json_obj = post_json_transform(aspect_json) - aspect_value = aspect_type.from_obj(post_json_obj["value"]) - result[aspect_name] = aspect_value # type: ignore + for mcp in mcps: + if mcp.aspect: + result[mcp.aspect.get_aspect_name()] = mcp.aspect # type: ignore return result diff --git a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py index 3d50ef5f254a0..9059dcca3e2b8 100644 --- a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py +++ b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py @@ -74,6 +74,7 @@ class DatahubRestSinkConfig(DatahubClientConfig): @dataclasses.dataclass class DataHubRestSinkReport(SinkReport): + mode: Optional[RestSinkMode] = None max_threads: Optional[int] = None gms_version: Optional[str] = None pending_requests: int = 0 @@ -126,6 +127,7 @@ def __post_init__(self) -> None: .get("acryldata/datahub", {}) .get("version", None) ) + self.report.mode = self.config.mode self.report.max_threads = self.config.max_threads logger.debug("Setting env variables to override config") logger.debug("Setting gms config") diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py index 1866599fa21c6..b39e05a8db4de 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py @@ -75,8 +75,37 @@ def set_metadata_endpoint(cls, values: dict) -> dict: def infer_metadata_endpoint(access_url: str) -> Optional[str]: - # See https://docs.getdbt.com/docs/cloud/about-cloud/access-regions-ip-addresses#api-access-urls - # and https://docs.getdbt.com/docs/dbt-cloud-apis/discovery-querying#discovery-api-endpoints + """Infer the dbt metadata endpoint from the access URL. + + See https://docs.getdbt.com/docs/cloud/about-cloud/access-regions-ip-addresses#api-access-urls + and https://docs.getdbt.com/docs/dbt-cloud-apis/discovery-querying#discovery-api-endpoints + for more information. + + Args: + access_url: The dbt Cloud access URL. This is the URL of the dbt Cloud UI. + + Returns: + The metadata endpoint, or None if it couldn't be inferred. + + Examples: + # Standard multi-tenant deployments. + >>> infer_metadata_endpoint("https://cloud.getdbt.com") + 'https://metadata.cloud.getdbt.com/graphql' + + >>> infer_metadata_endpoint("https://au.dbt.com") + 'https://metadata.au.dbt.com/graphql' + + >>> infer_metadata_endpoint("https://emea.dbt.com") + 'https://metadata.emea.dbt.com/graphql' + + # Cell-based deployment. + >>> infer_metadata_endpoint("https://prefix.us1.dbt.com") + 'https://prefix.metadata.us1.dbt.com/graphql' + + # Test with an "internal" URL. + >>> infer_metadata_endpoint("http://dbt.corp.internal") + 'http://metadata.dbt.corp.internal/graphql' + """ try: parsed_uri = urlparse(access_url) @@ -86,13 +115,18 @@ def infer_metadata_endpoint(access_url: str) -> Optional[str]: logger.debug(f"Unable to parse access URL {access_url}: {e}", exc_info=e) return None - if parsed_uri.hostname.endswith(".dbt.com"): + if parsed_uri.hostname.endswith(".getdbt.com") or parsed_uri.hostname in { + # Two special cases of multi-tenant deployments that use the dbt.com domain + # instead of getdbt.com. + "au.dbt.com", + "emea.dbt.com", + }: + return f"{parsed_uri.scheme}://metadata.{parsed_uri.netloc}/graphql" + elif parsed_uri.hostname.endswith(".dbt.com"): # For cell-based deployments. # prefix.region.dbt.com -> prefix.metadata.region.dbt.com hostname_parts = parsed_uri.hostname.split(".", maxsplit=1) return f"{parsed_uri.scheme}://{hostname_parts[0]}.metadata.{hostname_parts[1]}/graphql" - elif parsed_uri.hostname.endswith(".getdbt.com"): - return f"{parsed_uri.scheme}://metadata.{parsed_uri.netloc}/graphql" else: # The self-hosted variants also have the metadata. prefix. return f"{parsed_uri.scheme}://metadata.{parsed_uri.netloc}/graphql" @@ -403,10 +437,12 @@ def _parse_into_dbt_node(self, node: Dict) -> DBTNode: columns = [] if "columns" in node and node["columns"] is not None: # columns will be empty for ephemeral models - columns = [ - self._parse_into_dbt_column(column) - for column in sorted(node["columns"], key=lambda c: c["index"]) - ] + columns = list( + sorted( + [self._parse_into_dbt_column(column) for column in node["columns"]], + key=lambda c: c.index, + ) + ) test_info = None test_result = None @@ -494,7 +530,10 @@ def _parse_into_dbt_column( name=column["name"], comment=column.get("comment", ""), description=column["description"], - index=column["index"], + # For some reason, the index sometimes comes back as None from the dbt Cloud API. + # In that case, we just assume that the column is at the end of the table by + # assigning it a very large index. + index=column["index"] if column["index"] is not None else 10**6, data_type=column["type"], meta=column["meta"], tags=column["tags"], diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py index 1aad806e958f8..04de763370c95 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py @@ -7,6 +7,7 @@ import dateutil.parser import requests +from packaging import version from pydantic import BaseModel, Field, validator from datahub.configuration.git import GitReference @@ -41,14 +42,17 @@ class DBTCoreConfig(DBTCommonConfig): manifest_path: str = Field( - description="Path to dbt manifest JSON. See https://docs.getdbt.com/reference/artifacts/manifest-json Note this can be a local file or a URI." + description="Path to dbt manifest JSON. See https://docs.getdbt.com/reference/artifacts/manifest-json Note " + "this can be a local file or a URI." ) catalog_path: str = Field( - description="Path to dbt catalog JSON. See https://docs.getdbt.com/reference/artifacts/catalog-json Note this can be a local file or a URI." + description="Path to dbt catalog JSON. See https://docs.getdbt.com/reference/artifacts/catalog-json Note this " + "can be a local file or a URI." ) sources_path: Optional[str] = Field( default=None, - description="Path to dbt sources JSON. See https://docs.getdbt.com/reference/artifacts/sources-json. If not specified, last-modified fields will not be populated. Note this can be a local file or a URI.", + description="Path to dbt sources JSON. See https://docs.getdbt.com/reference/artifacts/sources-json. If not " + "specified, last-modified fields will not be populated. Note this can be a local file or a URI.", ) run_results_paths: List[str] = Field( default=[], @@ -569,16 +573,26 @@ def load_nodes(self) -> Tuple[List[DBTNode], Dict[str, Optional[str]]]: ) = self.loadManifestAndCatalog() # If catalog_version is between 1.7.0 and 1.7.2, report a warning. - if ( - catalog_version - and catalog_version.startswith("1.7.") - and catalog_version < "1.7.3" - ): - self.report.report_warning( - "dbt_catalog_version", - f"Due to a bug in dbt, dbt version {catalog_version} will have incomplete metadata on sources. " - "Please upgrade to dbt version 1.7.3 or later. " - "See https://github.com/dbt-labs/dbt-core/issues/9119 for details on the bug.", + try: + if ( + catalog_version + and catalog_version.startswith("1.7.") + and version.parse(catalog_version) < version.parse("1.7.3") + ): + self.report.report_warning( + title="Dbt Catalog Version", + message="Due to a bug in dbt version between 1.7.0 and 1.7.2, you will have incomplete metadata " + "source", + context=f"Due to a bug in dbt, dbt version {catalog_version} will have incomplete metadata on " + f"sources." + "Please upgrade to dbt version 1.7.3 or later. " + "See https://github.com/dbt-labs/dbt-core/issues/9119 for details on the bug.", + ) + except Exception as e: + self.report.info( + title="Dbt Catalog Version", + message="Failed to determine the catalog version", + exc=e, ) additional_custom_props = { diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index 73427d9084dd3..56b8ce00a4d1f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -15,6 +15,8 @@ import yaml from liquid import Template, Undefined from pydantic import Field, validator +from requests.adapters import HTTPAdapter, Retry +from requests.exceptions import ConnectionError from requests.models import HTTPBasicAuth, HTTPError from sqllineage.runner import LineageRunner from tenacity import retry_if_exception_type, stop_after_attempt, wait_exponential @@ -127,6 +129,10 @@ class ModeAPIConfig(ConfigModel): max_attempts: int = Field( default=5, description="Maximum number of attempts to retry before failing" ) + timeout: int = Field( + default=40, + description="Timout setting, how long to wait for the Mode rest api to send data before giving up", + ) class ModeConfig(StatefulIngestionConfigBase, DatasetLineageProviderConfigBase): @@ -299,7 +305,15 @@ def __init__(self, ctx: PipelineContext, config: ModeConfig): self.report = ModeSourceReport() self.ctx = ctx - self.session = requests.session() + self.session = requests.Session() + # Handling retry and backoff + retries = 3 + backoff_factor = 10 + retry = Retry(total=retries, backoff_factor=backoff_factor) + adapter = HTTPAdapter(max_retries=retry) + self.session.mount("http://", adapter) + self.session.mount("https://", adapter) + self.session.auth = HTTPBasicAuth( self.config.token, self.config.password.get_secret_value(), @@ -1469,15 +1483,16 @@ def _get_request_json(self, url: str) -> Dict: multiplier=self.config.api_options.retry_backoff_multiplier, max=self.config.api_options.max_retry_interval, ), - retry=retry_if_exception_type(HTTPError429), + retry=retry_if_exception_type((HTTPError429, ConnectionError)), stop=stop_after_attempt(self.config.api_options.max_attempts), ) @r.wraps def get_request(): try: - response = self.session.get(url) - response.raise_for_status() + response = self.session.get( + url, timeout=self.config.api_options.timeout + ) return response.json() except HTTPError as http_error: error_response = http_error.response diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py index 56e4c806eb0c3..71245353101f6 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py @@ -347,6 +347,9 @@ def detach_ctes( dialect = get_dialect(platform) statement = parse_statement(sql, dialect=dialect) + if not cte_mapping: + return statement + def replace_cte_refs(node: sqlglot.exp.Expression) -> sqlglot.exp.Expression: if ( isinstance(node, sqlglot.exp.Identifier) diff --git a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json index a7d46a2412b6c..631b28c64f14d 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json +++ b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json @@ -498,5 +498,27 @@ "runId": "bigquery-2022_02_03-07_00_00", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1643871600000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_queries_mcps_golden.json b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_queries_mcps_golden.json index a2d21b84f19e8..1b79e8464c05f 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_queries_mcps_golden.json +++ b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_queries_mcps_golden.json @@ -12588,5 +12588,555 @@ "runId": "bigquery-queries-2024_08_19-07_00_00", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.derived_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322481569 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_external_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322505477 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_view_on_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322464098 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_sharded_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322500148 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_wildcard_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322502689 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.destination_table_of_select_query,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322510656 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_another_project,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322478955 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_timetravelled_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322508214 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_base,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322460257 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view_and_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322472836 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322491425 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_tmp_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322457731 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_snapshot_on_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322471500 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.materialized_view_from_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322476091 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322484293 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322465459 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_ingestion_time_partition,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322495660 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db_2.table_from_other_db,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322467835 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322462741 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_nested_fields,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322498418 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.base_table_2,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322477705 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_integer_range_partition,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322497080 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322452660 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.partition_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322448864 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.usage_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322445357 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json index f7ee62201a863..95671b4f5a09c 100644 --- a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json +++ b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json @@ -400,5 +400,27 @@ "runId": "delta-lake-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-test-bucket/delta_tables/sales,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655664815399 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1672531200000, + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json index 24344d6a26664..0e88106d79175 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json @@ -1838,5 +1838,93 @@ "runId": "allow_table.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831477768 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655664815399 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/my_table_no_name,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831649788 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/level1/my_table_inner,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831866541 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json index 717481b253429..d04cc78971190 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json @@ -1785,5 +1785,93 @@ "runId": "inner_table.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831477768 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655664815399 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_no_name,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831649788 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/level1/my_table_inner,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831866541 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json index f446c2deb6a84..0b1a8140cd649 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json @@ -368,5 +368,27 @@ "runId": "relative_path.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,delta_tables/my_table_basic,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831477768 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json index 100f93fdaf5d3..cba70b2f54b18 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json @@ -724,5 +724,27 @@ "runId": "single_table.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831477768 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json index cb40e152f67cc..28e517cc8c319 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json @@ -1841,5 +1841,137 @@ "runId": "tables_with_nested_datatypes.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_nested_struct_1,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1709535906725 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_nested_struct,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1709110542636 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_string_and_array,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1708329078869 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_string_and_array_of_struct,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1708329897384 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_nested_struct_2,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1709536366367 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_string_and_nested_array_of_numbers,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1708330178404 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mode/mode_mces_golden.json b/metadata-ingestion/tests/integration/mode/mode_mces_golden.json index a6a685672bda0..ed00dc5734680 100644 --- a/metadata-ingestion/tests/integration/mode/mode_mces_golden.json +++ b/metadata-ingestion/tests/integration/mode/mode_mces_golden.json @@ -806,9 +806,9 @@ "json": { "fields": [ { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),amount)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),payment_date)", "schemaField": { - "fieldPath": "amount", + "fieldPath": "payment_date", "nullable": false, "type": { "type": { @@ -828,9 +828,9 @@ } }, { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),payment_date)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),amount)", "schemaField": { - "fieldPath": "payment_date", + "fieldPath": "amount", "nullable": false, "type": { "type": { @@ -1075,5 +1075,27 @@ "runId": "mode-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1639177973273 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "mode-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mode/test_mode.py b/metadata-ingestion/tests/integration/mode/test_mode.py index 7ea6597460de2..ce7533d5611e4 100644 --- a/metadata-ingestion/tests/integration/mode/test_mode.py +++ b/metadata-ingestion/tests/integration/mode/test_mode.py @@ -45,8 +45,12 @@ def __init__(self, error_list, status_code): def json(self): return self.json_data - def get(self, url): + def mount(self, prefix, adaptor): + return self + + def get(self, url, timeout=40): self.url = url + self.timeout = timeout response_json_path = f"{test_resources_dir}/setup/{JSON_RESPONSE_MAP.get(url)}" with open(response_json_path) as file: data = json.loads(file.read()) @@ -74,7 +78,7 @@ def mocked_requests_failure(*args, **kwargs): @freeze_time(FROZEN_TIME) def test_mode_ingest_success(pytestconfig, tmp_path): with patch( - "datahub.ingestion.source.mode.requests.session", + "datahub.ingestion.source.mode.requests.Session", side_effect=mocked_requests_sucess, ): pipeline = Pipeline.create( @@ -111,7 +115,7 @@ def test_mode_ingest_success(pytestconfig, tmp_path): @freeze_time(FROZEN_TIME) def test_mode_ingest_failure(pytestconfig, tmp_path): with patch( - "datahub.ingestion.source.mode.requests.session", + "datahub.ingestion.source.mode.requests.Session", side_effect=mocked_requests_failure, ): global test_resources_dir diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json index 77db69d197c78..7c0ecd8a07ddf 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json @@ -146,7 +146,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445129021 + "lastUpdatedTimestamp": 1586808250000 } }, "systemMetadata": { @@ -1370,5 +1370,27 @@ "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/no_extension/small,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808250000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json index b35fb24d43bf3..fb1ab3a869648 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json @@ -48,7 +48,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -60,7 +60,7 @@ "isPartOfKey": false }, { - "fieldPath": "height", + "fieldPath": "weight", "nullable": false, "type": { "type": { @@ -72,26 +72,26 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "height", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "color", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false } @@ -117,7 +117,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445089021 + "lastUpdatedTimestamp": 1586808220000 } }, "systemMetadata": { @@ -1307,7 +1307,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -1319,14 +1319,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -1343,7 +1343,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -1355,14 +1355,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -1388,7 +1388,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445119021 + "lastUpdatedTimestamp": 1586808240000 } }, "systemMetadata": { @@ -1798,5 +1798,49 @@ "runId": "folder_no_partition.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808220000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808240000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json index 62ba688990e2b..5c330a1953549 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json @@ -48,7 +48,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -60,14 +60,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -84,7 +84,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -96,14 +96,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -129,7 +129,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445119021 + "lastUpdatedTimestamp": 1586808240000 } }, "systemMetadata": { @@ -1458,5 +1458,27 @@ "runId": "folder_no_partition_exclude.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808240000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json index 346c2e9bcd83a..a14cfdfb6f635 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json @@ -48,7 +48,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -60,7 +60,7 @@ "isPartOfKey": false }, { - "fieldPath": "height", + "fieldPath": "weight", "nullable": false, "type": { "type": { @@ -72,26 +72,26 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "height", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "color", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false } @@ -117,7 +117,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445089021 + "lastUpdatedTimestamp": 1586808220000 } }, "systemMetadata": { @@ -1307,7 +1307,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -1319,14 +1319,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -1343,7 +1343,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -1355,14 +1355,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -1388,7 +1388,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445119021 + "lastUpdatedTimestamp": 1586808240000 } }, "systemMetadata": { @@ -1798,5 +1798,49 @@ "runId": "folder_no_partition_filename.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808220000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808240000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json index 6bff1bf1b1468..e695804f24f5d 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json @@ -48,7 +48,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -60,14 +60,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -84,7 +84,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -96,14 +96,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -129,7 +129,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445119021 + "lastUpdatedTimestamp": 1586808240000 } }, "systemMetadata": { @@ -1458,5 +1458,27 @@ "runId": "folder_no_partition_glob.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808240000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json index 6668e4e6a26c0..4b78aae2a3642 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json @@ -48,31 +48,43 @@ }, "fields": [ { - "fieldPath": "effect_changes", + "fieldPath": "effect_changes.effect_entries.effect", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries", + "fieldPath": "effect_changes.effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.effect", + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", "nullable": false, "type": { "type": { @@ -96,19 +108,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.is_native", + "fieldPath": "effect_changes.effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.name", + "fieldPath": "effect_changes.version_group.name", "nullable": false, "type": { "type": { @@ -120,7 +132,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.url", + "fieldPath": "effect_changes.version_group.url", "nullable": false, "type": { "type": { @@ -144,19 +156,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.name", + "fieldPath": "effect_changes", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.url", + "fieldPath": "effect_entries.effect", "nullable": false, "type": { "type": { @@ -168,19 +180,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries", + "fieldPath": "effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.effect", + "fieldPath": "effect_entries.language.url", "nullable": false, "type": { "type": { @@ -204,7 +216,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.name", + "fieldPath": "effect_entries.short_effect", "nullable": false, "type": { "type": { @@ -216,19 +228,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.url", + "fieldPath": "effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.short_effect", + "fieldPath": "flavor_text_entries.flavor_text", "nullable": false, "type": { "type": { @@ -240,19 +252,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries", + "fieldPath": "flavor_text_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.flavor_text", + "fieldPath": "flavor_text_entries.language.url", "nullable": false, "type": { "type": { @@ -276,7 +288,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.name", + "fieldPath": "flavor_text_entries.version_group.name", "nullable": false, "type": { "type": { @@ -288,7 +300,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.url", + "fieldPath": "flavor_text_entries.version_group.url", "nullable": false, "type": { "type": { @@ -312,19 +324,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.name", + "fieldPath": "flavor_text_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.url", + "fieldPath": "generation.name", "nullable": false, "type": { "type": { @@ -336,19 +348,7 @@ "isPartOfKey": false }, { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", + "fieldPath": "generation.url", "nullable": false, "type": { "type": { @@ -360,14 +360,14 @@ "isPartOfKey": false }, { - "fieldPath": "generation.url", + "fieldPath": "generation", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -408,31 +408,19 @@ "isPartOfKey": false }, { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", + "fieldPath": "names.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "names.language.name", + "fieldPath": "names.language.url", "nullable": false, "type": { "type": { @@ -444,14 +432,14 @@ "isPartOfKey": false }, { - "fieldPath": "names.language.url", + "fieldPath": "names.language", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -468,7 +456,7 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon", + "fieldPath": "names", "nullable": false, "type": { "type": { @@ -492,19 +480,19 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon", + "fieldPath": "pokemon.pokemon.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.name", + "fieldPath": "pokemon.pokemon.url", "nullable": false, "type": { "type": { @@ -516,14 +504,14 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.url", + "fieldPath": "pokemon.pokemon", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -538,6 +526,18 @@ "nativeDataType": "int", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false } ] } @@ -561,7 +561,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445259021 + "lastUpdatedTimestamp": 1586808380000 } }, "systemMetadata": { @@ -1711,5 +1711,27 @@ "runId": "folder_partition_basic.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808380000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json index 7f8bcfec6d314..201828842b84b 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json @@ -48,31 +48,43 @@ }, "fields": [ { - "fieldPath": "effect_changes", + "fieldPath": "effect_changes.effect_entries.effect", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries", + "fieldPath": "effect_changes.effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.effect", + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", "nullable": false, "type": { "type": { @@ -96,19 +108,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.is_native", + "fieldPath": "effect_changes.effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.name", + "fieldPath": "effect_changes.version_group.name", "nullable": false, "type": { "type": { @@ -120,7 +132,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.url", + "fieldPath": "effect_changes.version_group.url", "nullable": false, "type": { "type": { @@ -144,19 +156,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.name", + "fieldPath": "effect_changes", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.url", + "fieldPath": "effect_entries.effect", "nullable": false, "type": { "type": { @@ -168,19 +180,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries", + "fieldPath": "effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.effect", + "fieldPath": "effect_entries.language.url", "nullable": false, "type": { "type": { @@ -204,7 +216,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.name", + "fieldPath": "effect_entries.short_effect", "nullable": false, "type": { "type": { @@ -216,19 +228,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.url", + "fieldPath": "effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.short_effect", + "fieldPath": "flavor_text_entries.flavor_text", "nullable": false, "type": { "type": { @@ -240,19 +252,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries", + "fieldPath": "flavor_text_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.flavor_text", + "fieldPath": "flavor_text_entries.language.url", "nullable": false, "type": { "type": { @@ -276,7 +288,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.name", + "fieldPath": "flavor_text_entries.version_group.name", "nullable": false, "type": { "type": { @@ -288,7 +300,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.url", + "fieldPath": "flavor_text_entries.version_group.url", "nullable": false, "type": { "type": { @@ -312,19 +324,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.name", + "fieldPath": "flavor_text_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.url", + "fieldPath": "generation.name", "nullable": false, "type": { "type": { @@ -336,19 +348,7 @@ "isPartOfKey": false }, { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", + "fieldPath": "generation.url", "nullable": false, "type": { "type": { @@ -360,14 +360,14 @@ "isPartOfKey": false }, { - "fieldPath": "generation.url", + "fieldPath": "generation", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -408,31 +408,19 @@ "isPartOfKey": false }, { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", + "fieldPath": "names.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "names.language.name", + "fieldPath": "names.language.url", "nullable": false, "type": { "type": { @@ -444,14 +432,14 @@ "isPartOfKey": false }, { - "fieldPath": "names.language.url", + "fieldPath": "names.language", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -468,7 +456,7 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon", + "fieldPath": "names", "nullable": false, "type": { "type": { @@ -492,19 +480,19 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon", + "fieldPath": "pokemon.pokemon.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.name", + "fieldPath": "pokemon.pokemon.url", "nullable": false, "type": { "type": { @@ -516,14 +504,14 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.url", + "fieldPath": "pokemon.pokemon", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -538,6 +526,18 @@ "nativeDataType": "int", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false } ] } @@ -561,7 +561,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445259021 + "lastUpdatedTimestamp": 1586808380000 } }, "systemMetadata": { @@ -1711,5 +1711,27 @@ "runId": "folder_partition_keyval.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808380000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema.json index 35efe52994837..52aead01fb2fc 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema.json @@ -48,31 +48,43 @@ }, "fields": [ { - "fieldPath": "effect_changes", + "fieldPath": "effect_changes.effect_entries.effect", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries", + "fieldPath": "effect_changes.effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.effect", + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", "nullable": false, "type": { "type": { @@ -96,19 +108,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.is_native", + "fieldPath": "effect_changes.effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.name", + "fieldPath": "effect_changes.version_group.name", "nullable": false, "type": { "type": { @@ -120,7 +132,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.url", + "fieldPath": "effect_changes.version_group.url", "nullable": false, "type": { "type": { @@ -144,19 +156,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.name", + "fieldPath": "effect_changes", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.url", + "fieldPath": "effect_entries.effect", "nullable": false, "type": { "type": { @@ -168,19 +180,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries", + "fieldPath": "effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.effect", + "fieldPath": "effect_entries.language.url", "nullable": false, "type": { "type": { @@ -204,7 +216,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.name", + "fieldPath": "effect_entries.short_effect", "nullable": false, "type": { "type": { @@ -216,19 +228,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.url", + "fieldPath": "effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.short_effect", + "fieldPath": "flavor_text_entries.flavor_text", "nullable": false, "type": { "type": { @@ -240,19 +252,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries", + "fieldPath": "flavor_text_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.flavor_text", + "fieldPath": "flavor_text_entries.language.url", "nullable": false, "type": { "type": { @@ -276,7 +288,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.name", + "fieldPath": "flavor_text_entries.version_group.name", "nullable": false, "type": { "type": { @@ -288,7 +300,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.url", + "fieldPath": "flavor_text_entries.version_group.url", "nullable": false, "type": { "type": { @@ -312,19 +324,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.name", + "fieldPath": "flavor_text_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.url", + "fieldPath": "generation.name", "nullable": false, "type": { "type": { @@ -336,19 +348,7 @@ "isPartOfKey": false }, { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", + "fieldPath": "generation.url", "nullable": false, "type": { "type": { @@ -360,14 +360,14 @@ "isPartOfKey": false }, { - "fieldPath": "generation.url", + "fieldPath": "generation", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -408,31 +408,19 @@ "isPartOfKey": false }, { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", + "fieldPath": "names.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "names.language.name", + "fieldPath": "names.language.url", "nullable": false, "type": { "type": { @@ -444,14 +432,14 @@ "isPartOfKey": false }, { - "fieldPath": "names.language.url", + "fieldPath": "names.language", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -468,7 +456,7 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon", + "fieldPath": "names", "nullable": false, "type": { "type": { @@ -492,19 +480,19 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon", + "fieldPath": "pokemon.pokemon.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.name", + "fieldPath": "pokemon.pokemon.url", "nullable": false, "type": { "type": { @@ -516,14 +504,14 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.url", + "fieldPath": "pokemon.pokemon", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -538,6 +526,18 @@ "nativeDataType": "int", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false } ] } @@ -561,7 +561,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445259021 + "lastUpdatedTimestamp": 1586808380000 } }, "systemMetadata": { @@ -1711,5 +1711,27 @@ "runId": "folder_partition_update_schema.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808380000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema_with_partition_autodetect.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema_with_partition_autodetect.json deleted file mode 100644 index adb3686309e6c..0000000000000 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema_with_partition_autodetect.json +++ /dev/null @@ -1,2572 +0,0 @@ -[ -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv", - "number_of_files": "3", - "size_in_bytes": "3539" - }, - "name": "folder_aaa.food_csv", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_csv", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833420000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests" - }, - "name": "tests" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration" - }, - "name": "integration" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3" - }, - "name": "s3" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data" - }, - "name": "test_data" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system" - }, - "name": "local_system" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a" - }, - "name": "folder_a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa" - }, - "name": "folder_aa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:98a716614da5246426edd48260406364" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 4, - "columnCount": 4, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "apple", - "frequency": 1 - }, - { - "value": "cookie", - "frequency": 1 - }, - { - "value": "lasagna", - "frequency": 1 - }, - { - "value": "pasta", - "frequency": 1 - } - ], - "sampleValues": [ - "apple", - "cookie", - "lasagna", - "pasta" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "23", - "frequency": 1 - }, - { - "value": "49", - "frequency": 1 - }, - { - "value": "50", - "frequency": 1 - }, - { - "value": "72", - "frequency": 1 - } - ], - "sampleValues": [ - "23", - "49", - "50", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 1, - "uniqueProportion": 0.25, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 4 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 3, - "uniqueProportion": 0.75, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "brown", - "frequency": 1 - }, - { - "value": "red", - "frequency": 2 - }, - { - "value": "yellow", - "frequency": 1 - } - ], - "sampleValues": [ - "brown", - "red", - "red", - "yellow" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet", - "number_of_files": "2", - "size_in_bytes": "8412" - }, - "name": "folder_aaa.food_parquet", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_parquet", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "healthy", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833440000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 70, - "columnCount": 5, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 9, - "uniqueProportion": 0.13043478260869565, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "distinctValueFrequencies": [ - { - "value": "NullValue", - "frequency": 1 - }, - { - "value": "apple", - "frequency": 7 - }, - { - "value": "chicken", - "frequency": 7 - }, - { - "value": "cookie", - "frequency": 6 - }, - { - "value": "hamburger", - "frequency": 7 - }, - { - "value": "lasagna", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 7 - }, - { - "value": "pasta", - "frequency": 7 - }, - { - "value": "spinach", - "frequency": 7 - }, - { - "value": "sushi", - "frequency": 7 - }, - { - "value": "water", - "frequency": 7 - } - ], - "sampleValues": [ - "apple", - "apple", - "apple", - "chicken", - "cookie", - "cookie", - "cookie", - "lasagna", - "lasagna", - "orange", - "orange", - "pasta", - "pasta", - "pasta", - "pasta", - "spinach", - "spinach", - "spinach", - "water", - "water" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 10, - "uniqueProportion": 0.14285714285714285, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "10", - "frequency": 7 - }, - { - "value": "2", - "frequency": 7 - }, - { - "value": "23", - "frequency": 7 - }, - { - "value": "32", - "frequency": 7 - }, - { - "value": "36", - "frequency": 7 - }, - { - "value": "43", - "frequency": 7 - }, - { - "value": "49", - "frequency": 7 - }, - { - "value": "50", - "frequency": 7 - }, - { - "value": "53", - "frequency": 7 - }, - { - "value": "72", - "frequency": 7 - } - ], - "sampleValues": [ - "10", - "10", - "10", - "23", - "23", - "23", - "32", - "32", - "36", - "43", - "43", - "49", - "49", - "50", - "50", - "50", - "72", - "72", - "72", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 4, - "uniqueProportion": 0.05714285714285714, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 24 - }, - { - "value": "5", - "frequency": 15 - }, - { - "value": "6", - "frequency": 23 - }, - { - "value": "7", - "frequency": 8 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4", - "4", - "4", - "4", - "5", - "5", - "5", - "5", - "5", - "6", - "6", - "6", - "6", - "6", - "6", - "7", - "7" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 7, - "uniqueProportion": 0.1, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "blue", - "frequency": 7 - }, - { - "value": "brown", - "frequency": 14 - }, - { - "value": "green", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 14 - }, - { - "value": "red", - "frequency": 14 - }, - { - "value": "white", - "frequency": 7 - }, - { - "value": "yellow", - "frequency": 7 - } - ], - "sampleValues": [ - "blue", - "blue", - "brown", - "brown", - "brown", - "green", - "green", - "green", - "orange", - "orange", - "red", - "red", - "red", - "red", - "red", - "white", - "yellow", - "yellow", - "yellow", - "yellow" - ] - }, - { - "fieldPath": "healthy", - "uniqueCount": 2, - "uniqueProportion": 0.028985507246376812, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "sampleValues": [ - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "None", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", - "number_of_files": "13", - "size_in_bytes": "188600" - }, - "name": "folder_aaa.pokemon_abilities_json", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.pokemon_abilities_json", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "effect_changes", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.is_native", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.short_effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.flavor_text", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "id", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "is_main_series", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.is_hidden", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.slot", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833590000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 1, - "columnCount": 9, - "fieldProfiles": [ - { - "fieldPath": "effect_changes", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect_entries=[Row(effect='Hat im Kampf keinen Effekt.', language=Row(is_native='no', name='de', url='https://pokeapi.co/api/v2/language/6/')), Row(effect='Has no effect in battle.', language=Row(is_native='yes', name='en', url='https://pokeapi.co/api/v2/language/9/'))], version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/'))]" - ] - }, - { - "fieldPath": "effect_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect='Attacken die Schaden verursachen haben mit jedem Treffer eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen, wenn die Attacke dies nicht bereits als Nebeneffekt hat.\\n\\nDer Effekt stapelt nicht mit dem von getragenen Items.\\n\\nAu\u00dferhalb vom Kampf: Wenn ein Pok\u00e9mon mit dieser F\u00e4higkeit an erster Stelle im Team steht, tauchen wilde Pok\u00e9mon nur halb so oft auf.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), short_effect='Mit jedem Treffer besteht eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen.'), Row(effect=\"This Pok\u00e9mon's damaging moves have a 10% chance to make the target flinch with each hit if they do not already cause flinching as a secondary effect.\\n\\nThis ability does not stack with a held item.\\n\\nOverworld: The wild encounter rate is halved while this Pok\u00e9mon is first in the party.\", language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), short_effect='Has a 10% chance of making target Pok\u00e9mon flinch with each hit.')]" - ] - }, - { - "fieldPath": "flavor_text_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ruby-sapphire', url='https://pokeapi.co/api/v2/version-group/5/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='emerald', url='https://pokeapi.co/api/v2/version-group/6/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='firered-leafgreen', url='https://pokeapi.co/api/v2/version-group/7/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='diamond-pearl', url='https://pokeapi.co/api/v2/version-group/8/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='platinum', url='https://pokeapi.co/api/v2/version-group/9/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='heartgold-soulsilver', url='https://pokeapi.co/api/v2/version-group/10/')), Row(flavor_text='La puanteur peut\\neffrayer l\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-2-white-2', url='https://pokeapi.co/api/v2/version-group/14/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/'))]" - ] - }, - { - "fieldPath": "generation", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "Row(name='generation-iii', url='https://pokeapi.co/api/v2/generation/3/')" - ] - }, - { - "fieldPath": "id", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "1", - "frequency": 1 - } - ], - "sampleValues": [ - "1" - ] - }, - { - "fieldPath": "is_main_series", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "True" - ] - }, - { - "fieldPath": "name", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "stench", - "frequency": 1 - } - ], - "sampleValues": [ - "stench" - ] - }, - { - "fieldPath": "names", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), name='\uc545\ucde8'), Row(language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), name='\u60e1\u81ed'), Row(language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), name='Puanteur'), Row(language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), name='Duftnote'), Row(language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), name='Hedor'), Row(language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), name='Tanfo'), Row(language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), name='Stench'), Row(language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), name='\u6076\u81ed')]" - ] - }, - { - "fieldPath": "pokemon", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(is_hidden=True, pokemon=Row(name='gloom', url='https://pokeapi.co/api/v2/pokemon/44/'), slot=3), Row(is_hidden=False, pokemon=Row(name='grimer', url='https://pokeapi.co/api/v2/pokemon/88/'), slot=1), Row(is_hidden=False, pokemon=Row(name='muk', url='https://pokeapi.co/api/v2/pokemon/89/'), slot=1), Row(is_hidden=False, pokemon=Row(name='stunky', url='https://pokeapi.co/api/v2/pokemon/434/'), slot=1), Row(is_hidden=False, pokemon=Row(name='skuntank', url='https://pokeapi.co/api/v2/pokemon/435/'), slot=1), Row(is_hidden=False, pokemon=Row(name='trubbish', url='https://pokeapi.co/api/v2/pokemon/568/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor', url='https://pokeapi.co/api/v2/pokemon/569/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor-gmax', url='https://pokeapi.co/api/v2/pokemon/10198/'), slot=1)]" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -} -] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json deleted file mode 100644 index 80f584788fdb2..0000000000000 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json +++ /dev/null @@ -1,2572 +0,0 @@ -[ -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv", - "number_of_files": "3", - "size_in_bytes": "3539" - }, - "name": "folder_aaa.food_csv", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_csv", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833420000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests" - }, - "name": "tests" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration" - }, - "name": "integration" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3" - }, - "name": "s3" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data" - }, - "name": "test_data" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system" - }, - "name": "local_system" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a" - }, - "name": "folder_a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa" - }, - "name": "folder_aa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:98a716614da5246426edd48260406364" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 4, - "columnCount": 4, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "apple", - "frequency": 1 - }, - { - "value": "cookie", - "frequency": 1 - }, - { - "value": "lasagna", - "frequency": 1 - }, - { - "value": "pasta", - "frequency": 1 - } - ], - "sampleValues": [ - "apple", - "cookie", - "lasagna", - "pasta" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "23", - "frequency": 1 - }, - { - "value": "49", - "frequency": 1 - }, - { - "value": "50", - "frequency": 1 - }, - { - "value": "72", - "frequency": 1 - } - ], - "sampleValues": [ - "23", - "49", - "50", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 1, - "uniqueProportion": 0.25, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 4 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 3, - "uniqueProportion": 0.75, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "brown", - "frequency": 1 - }, - { - "value": "red", - "frequency": 2 - }, - { - "value": "yellow", - "frequency": 1 - } - ], - "sampleValues": [ - "brown", - "red", - "red", - "yellow" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet", - "number_of_files": "2", - "size_in_bytes": "8412" - }, - "name": "folder_aaa.food_parquet", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_parquet", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "healthy", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833440000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 70, - "columnCount": 5, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 9, - "uniqueProportion": 0.13043478260869565, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "distinctValueFrequencies": [ - { - "value": "NullValue", - "frequency": 1 - }, - { - "value": "apple", - "frequency": 7 - }, - { - "value": "chicken", - "frequency": 7 - }, - { - "value": "cookie", - "frequency": 6 - }, - { - "value": "hamburger", - "frequency": 7 - }, - { - "value": "lasagna", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 7 - }, - { - "value": "pasta", - "frequency": 7 - }, - { - "value": "spinach", - "frequency": 7 - }, - { - "value": "sushi", - "frequency": 7 - }, - { - "value": "water", - "frequency": 7 - } - ], - "sampleValues": [ - "apple", - "apple", - "apple", - "chicken", - "cookie", - "cookie", - "cookie", - "lasagna", - "lasagna", - "orange", - "orange", - "pasta", - "pasta", - "pasta", - "pasta", - "spinach", - "spinach", - "spinach", - "water", - "water" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 10, - "uniqueProportion": 0.14285714285714285, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "10", - "frequency": 7 - }, - { - "value": "2", - "frequency": 7 - }, - { - "value": "23", - "frequency": 7 - }, - { - "value": "32", - "frequency": 7 - }, - { - "value": "36", - "frequency": 7 - }, - { - "value": "43", - "frequency": 7 - }, - { - "value": "49", - "frequency": 7 - }, - { - "value": "50", - "frequency": 7 - }, - { - "value": "53", - "frequency": 7 - }, - { - "value": "72", - "frequency": 7 - } - ], - "sampleValues": [ - "10", - "10", - "10", - "23", - "23", - "23", - "32", - "32", - "36", - "43", - "43", - "49", - "49", - "50", - "50", - "50", - "72", - "72", - "72", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 4, - "uniqueProportion": 0.05714285714285714, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 24 - }, - { - "value": "5", - "frequency": 15 - }, - { - "value": "6", - "frequency": 23 - }, - { - "value": "7", - "frequency": 8 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4", - "4", - "4", - "4", - "5", - "5", - "5", - "5", - "5", - "6", - "6", - "6", - "6", - "6", - "6", - "7", - "7" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 7, - "uniqueProportion": 0.1, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "blue", - "frequency": 7 - }, - { - "value": "brown", - "frequency": 14 - }, - { - "value": "green", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 14 - }, - { - "value": "red", - "frequency": 14 - }, - { - "value": "white", - "frequency": 7 - }, - { - "value": "yellow", - "frequency": 7 - } - ], - "sampleValues": [ - "blue", - "blue", - "brown", - "brown", - "brown", - "green", - "green", - "green", - "orange", - "orange", - "red", - "red", - "red", - "red", - "red", - "white", - "yellow", - "yellow", - "yellow", - "yellow" - ] - }, - { - "fieldPath": "healthy", - "uniqueCount": 2, - "uniqueProportion": 0.028985507246376812, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "sampleValues": [ - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "None", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", - "number_of_files": "13", - "size_in_bytes": "188600" - }, - "name": "folder_aaa.pokemon_abilities_json", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.pokemon_abilities_json", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "effect_changes", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.is_native", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.short_effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.flavor_text", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "id", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "is_main_series", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.is_hidden", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.slot", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833590000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 1, - "columnCount": 9, - "fieldProfiles": [ - { - "fieldPath": "effect_changes", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect_entries=[Row(effect='Hat im Kampf keinen Effekt.', language=Row(is_native='no', name='de', url='https://pokeapi.co/api/v2/language/6/')), Row(effect='Has no effect in battle.', language=Row(is_native='yes', name='en', url='https://pokeapi.co/api/v2/language/9/'))], version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/'))]" - ] - }, - { - "fieldPath": "effect_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect='Attacken die Schaden verursachen haben mit jedem Treffer eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen, wenn die Attacke dies nicht bereits als Nebeneffekt hat.\\n\\nDer Effekt stapelt nicht mit dem von getragenen Items.\\n\\nAu\u00dferhalb vom Kampf: Wenn ein Pok\u00e9mon mit dieser F\u00e4higkeit an erster Stelle im Team steht, tauchen wilde Pok\u00e9mon nur halb so oft auf.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), short_effect='Mit jedem Treffer besteht eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen.'), Row(effect=\"This Pok\u00e9mon's damaging moves have a 10% chance to make the target flinch with each hit if they do not already cause flinching as a secondary effect.\\n\\nThis ability does not stack with a held item.\\n\\nOverworld: The wild encounter rate is halved while this Pok\u00e9mon is first in the party.\", language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), short_effect='Has a 10% chance of making target Pok\u00e9mon flinch with each hit.')]" - ] - }, - { - "fieldPath": "flavor_text_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ruby-sapphire', url='https://pokeapi.co/api/v2/version-group/5/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='emerald', url='https://pokeapi.co/api/v2/version-group/6/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='firered-leafgreen', url='https://pokeapi.co/api/v2/version-group/7/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='diamond-pearl', url='https://pokeapi.co/api/v2/version-group/8/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='platinum', url='https://pokeapi.co/api/v2/version-group/9/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='heartgold-soulsilver', url='https://pokeapi.co/api/v2/version-group/10/')), Row(flavor_text='La puanteur peut\\neffrayer l\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-2-white-2', url='https://pokeapi.co/api/v2/version-group/14/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/'))]" - ] - }, - { - "fieldPath": "generation", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "Row(name='generation-iii', url='https://pokeapi.co/api/v2/generation/3/')" - ] - }, - { - "fieldPath": "id", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "1", - "frequency": 1 - } - ], - "sampleValues": [ - "1" - ] - }, - { - "fieldPath": "is_main_series", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "True" - ] - }, - { - "fieldPath": "name", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "stench", - "frequency": 1 - } - ], - "sampleValues": [ - "stench" - ] - }, - { - "fieldPath": "names", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), name='\uc545\ucde8'), Row(language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), name='\u60e1\u81ed'), Row(language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), name='Puanteur'), Row(language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), name='Duftnote'), Row(language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), name='Hedor'), Row(language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), name='Tanfo'), Row(language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), name='Stench'), Row(language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), name='\u6076\u81ed')]" - ] - }, - { - "fieldPath": "pokemon", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(is_hidden=True, pokemon=Row(name='gloom', url='https://pokeapi.co/api/v2/pokemon/44/'), slot=3), Row(is_hidden=False, pokemon=Row(name='grimer', url='https://pokeapi.co/api/v2/pokemon/88/'), slot=1), Row(is_hidden=False, pokemon=Row(name='muk', url='https://pokeapi.co/api/v2/pokemon/89/'), slot=1), Row(is_hidden=False, pokemon=Row(name='stunky', url='https://pokeapi.co/api/v2/pokemon/434/'), slot=1), Row(is_hidden=False, pokemon=Row(name='skuntank', url='https://pokeapi.co/api/v2/pokemon/435/'), slot=1), Row(is_hidden=False, pokemon=Row(name='trubbish', url='https://pokeapi.co/api/v2/pokemon/568/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor', url='https://pokeapi.co/api/v2/pokemon/569/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor-gmax', url='https://pokeapi.co/api/v2/pokemon/10198/'), slot=1)]" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -} -] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json deleted file mode 100644 index a384a8f1e501d..0000000000000 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json +++ /dev/null @@ -1,2572 +0,0 @@ -[ -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv", - "number_of_files": "3", - "size_in_bytes": "3539" - }, - "name": "folder_aaa.food_csv", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_csv", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833420000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests" - }, - "name": "tests" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration" - }, - "name": "integration" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3" - }, - "name": "s3" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data" - }, - "name": "test_data" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system" - }, - "name": "local_system" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a" - }, - "name": "folder_a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa" - }, - "name": "folder_aa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:98a716614da5246426edd48260406364" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 4, - "columnCount": 4, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "apple", - "frequency": 1 - }, - { - "value": "cookie", - "frequency": 1 - }, - { - "value": "lasagna", - "frequency": 1 - }, - { - "value": "pasta", - "frequency": 1 - } - ], - "sampleValues": [ - "apple", - "cookie", - "lasagna", - "pasta" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "23", - "frequency": 1 - }, - { - "value": "49", - "frequency": 1 - }, - { - "value": "50", - "frequency": 1 - }, - { - "value": "72", - "frequency": 1 - } - ], - "sampleValues": [ - "23", - "49", - "50", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 1, - "uniqueProportion": 0.25, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 4 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 3, - "uniqueProportion": 0.75, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "brown", - "frequency": 1 - }, - { - "value": "red", - "frequency": 2 - }, - { - "value": "yellow", - "frequency": 1 - } - ], - "sampleValues": [ - "brown", - "red", - "red", - "yellow" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet", - "number_of_files": "2", - "size_in_bytes": "8412" - }, - "name": "folder_aaa.food_parquet", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_parquet", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "healthy", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833440000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 70, - "columnCount": 5, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 9, - "uniqueProportion": 0.13043478260869565, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "distinctValueFrequencies": [ - { - "value": "NullValue", - "frequency": 1 - }, - { - "value": "apple", - "frequency": 7 - }, - { - "value": "chicken", - "frequency": 7 - }, - { - "value": "cookie", - "frequency": 6 - }, - { - "value": "hamburger", - "frequency": 7 - }, - { - "value": "lasagna", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 7 - }, - { - "value": "pasta", - "frequency": 7 - }, - { - "value": "spinach", - "frequency": 7 - }, - { - "value": "sushi", - "frequency": 7 - }, - { - "value": "water", - "frequency": 7 - } - ], - "sampleValues": [ - "apple", - "apple", - "apple", - "chicken", - "cookie", - "cookie", - "cookie", - "lasagna", - "lasagna", - "orange", - "orange", - "pasta", - "pasta", - "pasta", - "pasta", - "spinach", - "spinach", - "spinach", - "water", - "water" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 10, - "uniqueProportion": 0.14285714285714285, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "10", - "frequency": 7 - }, - { - "value": "2", - "frequency": 7 - }, - { - "value": "23", - "frequency": 7 - }, - { - "value": "32", - "frequency": 7 - }, - { - "value": "36", - "frequency": 7 - }, - { - "value": "43", - "frequency": 7 - }, - { - "value": "49", - "frequency": 7 - }, - { - "value": "50", - "frequency": 7 - }, - { - "value": "53", - "frequency": 7 - }, - { - "value": "72", - "frequency": 7 - } - ], - "sampleValues": [ - "10", - "10", - "10", - "23", - "23", - "23", - "32", - "32", - "36", - "43", - "43", - "49", - "49", - "50", - "50", - "50", - "72", - "72", - "72", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 4, - "uniqueProportion": 0.05714285714285714, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 24 - }, - { - "value": "5", - "frequency": 15 - }, - { - "value": "6", - "frequency": 23 - }, - { - "value": "7", - "frequency": 8 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4", - "4", - "4", - "4", - "5", - "5", - "5", - "5", - "5", - "6", - "6", - "6", - "6", - "6", - "6", - "7", - "7" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 7, - "uniqueProportion": 0.1, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "blue", - "frequency": 7 - }, - { - "value": "brown", - "frequency": 14 - }, - { - "value": "green", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 14 - }, - { - "value": "red", - "frequency": 14 - }, - { - "value": "white", - "frequency": 7 - }, - { - "value": "yellow", - "frequency": 7 - } - ], - "sampleValues": [ - "blue", - "blue", - "brown", - "brown", - "brown", - "green", - "green", - "green", - "orange", - "orange", - "red", - "red", - "red", - "red", - "red", - "white", - "yellow", - "yellow", - "yellow", - "yellow" - ] - }, - { - "fieldPath": "healthy", - "uniqueCount": 2, - "uniqueProportion": 0.028985507246376812, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "sampleValues": [ - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "None", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", - "number_of_files": "13", - "size_in_bytes": "188600" - }, - "name": "folder_aaa.pokemon_abilities_json", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.pokemon_abilities_json", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "effect_changes", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.is_native", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.short_effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.flavor_text", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "id", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "is_main_series", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.is_hidden", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.slot", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833590000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 1, - "columnCount": 9, - "fieldProfiles": [ - { - "fieldPath": "effect_changes", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect_entries=[Row(effect='Hat im Kampf keinen Effekt.', language=Row(is_native='no', name='de', url='https://pokeapi.co/api/v2/language/6/')), Row(effect='Has no effect in battle.', language=Row(is_native='yes', name='en', url='https://pokeapi.co/api/v2/language/9/'))], version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/'))]" - ] - }, - { - "fieldPath": "effect_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect='Attacken die Schaden verursachen haben mit jedem Treffer eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen, wenn die Attacke dies nicht bereits als Nebeneffekt hat.\\n\\nDer Effekt stapelt nicht mit dem von getragenen Items.\\n\\nAu\u00dferhalb vom Kampf: Wenn ein Pok\u00e9mon mit dieser F\u00e4higkeit an erster Stelle im Team steht, tauchen wilde Pok\u00e9mon nur halb so oft auf.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), short_effect='Mit jedem Treffer besteht eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen.'), Row(effect=\"This Pok\u00e9mon's damaging moves have a 10% chance to make the target flinch with each hit if they do not already cause flinching as a secondary effect.\\n\\nThis ability does not stack with a held item.\\n\\nOverworld: The wild encounter rate is halved while this Pok\u00e9mon is first in the party.\", language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), short_effect='Has a 10% chance of making target Pok\u00e9mon flinch with each hit.')]" - ] - }, - { - "fieldPath": "flavor_text_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ruby-sapphire', url='https://pokeapi.co/api/v2/version-group/5/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='emerald', url='https://pokeapi.co/api/v2/version-group/6/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='firered-leafgreen', url='https://pokeapi.co/api/v2/version-group/7/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='diamond-pearl', url='https://pokeapi.co/api/v2/version-group/8/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='platinum', url='https://pokeapi.co/api/v2/version-group/9/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='heartgold-soulsilver', url='https://pokeapi.co/api/v2/version-group/10/')), Row(flavor_text='La puanteur peut\\neffrayer l\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-2-white-2', url='https://pokeapi.co/api/v2/version-group/14/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/'))]" - ] - }, - { - "fieldPath": "generation", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "Row(name='generation-iii', url='https://pokeapi.co/api/v2/generation/3/')" - ] - }, - { - "fieldPath": "id", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "1", - "frequency": 1 - } - ], - "sampleValues": [ - "1" - ] - }, - { - "fieldPath": "is_main_series", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "True" - ] - }, - { - "fieldPath": "name", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "stench", - "frequency": 1 - } - ], - "sampleValues": [ - "stench" - ] - }, - { - "fieldPath": "names", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), name='\uc545\ucde8'), Row(language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), name='\u60e1\u81ed'), Row(language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), name='Puanteur'), Row(language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), name='Duftnote'), Row(language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), name='Hedor'), Row(language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), name='Tanfo'), Row(language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), name='Stench'), Row(language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), name='\u6076\u81ed')]" - ] - }, - { - "fieldPath": "pokemon", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(is_hidden=True, pokemon=Row(name='gloom', url='https://pokeapi.co/api/v2/pokemon/44/'), slot=3), Row(is_hidden=False, pokemon=Row(name='grimer', url='https://pokeapi.co/api/v2/pokemon/88/'), slot=1), Row(is_hidden=False, pokemon=Row(name='muk', url='https://pokeapi.co/api/v2/pokemon/89/'), slot=1), Row(is_hidden=False, pokemon=Row(name='stunky', url='https://pokeapi.co/api/v2/pokemon/434/'), slot=1), Row(is_hidden=False, pokemon=Row(name='skuntank', url='https://pokeapi.co/api/v2/pokemon/435/'), slot=1), Row(is_hidden=False, pokemon=Row(name='trubbish', url='https://pokeapi.co/api/v2/pokemon/568/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor', url='https://pokeapi.co/api/v2/pokemon/569/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor-gmax', url='https://pokeapi.co/api/v2/pokemon/10198/'), slot=1)]" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -} -] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json index 4f98d68f8ae62..6ae2ec160035e 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json @@ -458,7 +458,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444889021 + "lastUpdatedTimestamp": 1586808010000 } }, "systemMetadata": { @@ -2896,38 +2896,38 @@ "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, @@ -2965,7 +2965,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444899021 + "lastUpdatedTimestamp": 1586808020000 } }, "systemMetadata": { @@ -3541,7 +3541,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444909021 + "lastUpdatedTimestamp": 1586808030000 } }, "systemMetadata": { @@ -3974,17 +3974,11 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json", - "number_of_files": "1", - "size_in_bytes": "4646" - }, - "name": "countries_json.json", - "description": "", - "tags": [] + "platform": "urn:li:dataPlatform:file", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" } }, "systemMetadata": { @@ -3997,11 +3991,17 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "datasetProperties", "aspect": { "json": { - "platform": "urn:li:dataPlatform:file", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + "customProperties": { + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json", + "number_of_files": "1", + "size_in_bytes": "4646" + }, + "name": "countries_json.json", + "description": "", + "tags": [] } }, "systemMetadata": { @@ -4036,14 +4036,14 @@ }, "fields": [ { - "fieldPath": "countries", + "fieldPath": "countries.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, @@ -4060,14 +4060,14 @@ "isPartOfKey": false }, { - "fieldPath": "countries.name", + "fieldPath": "countries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false } @@ -4093,7 +4093,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444919021 + "lastUpdatedTimestamp": 1586808040000 } }, "systemMetadata": { @@ -4205,6 +4205,23 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", @@ -4254,7 +4271,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -4266,14 +4283,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -4290,7 +4307,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -4302,14 +4319,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -4335,24 +4352,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444929021 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + "lastUpdatedTimestamp": 1586808050000 } }, "systemMetadata": { @@ -4739,17 +4739,11 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv", - "number_of_files": "1", - "size_in_bytes": "172" - }, - "name": "small.csv", - "description": "", - "tags": [] + "platform": "urn:li:dataPlatform:file", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" } }, "systemMetadata": { @@ -4762,11 +4756,17 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "datasetProperties", "aspect": { "json": { - "platform": "urn:li:dataPlatform:file", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + "customProperties": { + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv", + "number_of_files": "1", + "size_in_bytes": "172" + }, + "name": "small.csv", + "description": "", + "tags": [] } }, "systemMetadata": { @@ -4882,7 +4882,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444939021 + "lastUpdatedTimestamp": 1586808060000 } }, "systemMetadata": { @@ -5124,6 +5124,23 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", @@ -5566,7 +5583,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444949021 + "lastUpdatedTimestamp": 1586808070000 } }, "systemMetadata": { @@ -7807,13 +7824,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:file", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + "removed": false } }, "systemMetadata": { @@ -7824,7 +7840,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7840,7 +7856,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7856,7 +7872,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7872,7 +7888,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7888,7 +7904,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7904,7 +7920,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7918,15 +7934,153 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808010000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808020000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808030000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808040000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808050000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808060000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "status", + "changeType": "PATCH", + "aspectName": "datasetProperties", "aspect": { - "json": { - "removed": false - } + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808070000 + } + } + ] }, "systemMetadata": { "lastObserved": 1615443388097, diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json index 491cbdf8b9704..9bb8412b64f91 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json @@ -60,38 +60,38 @@ "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, @@ -129,7 +129,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444899021 + "lastUpdatedTimestamp": 1586808020000 } }, "systemMetadata": { @@ -1619,7 +1619,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444909021 + "lastUpdatedTimestamp": 1586808030000 } }, "systemMetadata": { @@ -2075,5 +2075,49 @@ "runId": "multiple_spec_for_files.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808020000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808030000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_specs_of_different_buckets.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_specs_of_different_buckets.json index da7c76876d415..fea1929b98ab5 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_specs_of_different_buckets.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_specs_of_different_buckets.json @@ -60,38 +60,38 @@ "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, @@ -129,7 +129,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444899021 + "lastUpdatedTimestamp": 1586808020000 } }, "systemMetadata": { @@ -1619,7 +1619,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444909021 + "lastUpdatedTimestamp": 1586808030000 } }, "systemMetadata": { @@ -2075,5 +2075,49 @@ "runId": "multiple_specs_of_different_buckets.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808020000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808030000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json index 76e562142e39e..a31a721fbbadd 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json @@ -60,38 +60,38 @@ "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, @@ -129,7 +129,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444899021 + "lastUpdatedTimestamp": 1586808020000 } }, "systemMetadata": { @@ -1504,5 +1504,27 @@ "runId": "single_file.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808020000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json index abc6eb1b471b2..63888d6bc4351 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json @@ -174,7 +174,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_inference_without_extension.json", + "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -269,7 +269,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_inference_without_extension.json", + "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -384,7 +384,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_inference_without_extension.json", + "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -503,7 +503,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_inference_without_extension.json", + "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -626,7 +626,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_inference_without_extension.json", + "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -806,5 +806,27 @@ "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/no_extension/small,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847850000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "file_inference_without_extension.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json index 1c022fabf9158..8087ea591beef 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json @@ -806,5 +806,27 @@ "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/no_extension/small,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847850000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json index 41484bec81935..64c1505414ff8 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json @@ -817,5 +817,49 @@ "runId": "folder_no_partition.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847820000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json index 0b28381fce8ff..f86c652462fd4 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json @@ -627,5 +627,27 @@ "runId": "folder_no_partition_exclude.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json index c4cfed8bfc7ac..2575db41ca8b7 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json @@ -817,5 +817,49 @@ "runId": "folder_no_partition_filename.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847820000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json index ae81f60ac8dcc..272beb57e85e1 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json @@ -627,5 +627,27 @@ "runId": "folder_no_partition_glob.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json index 684af901e6832..21623e2216565 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json @@ -1483,5 +1483,49 @@ "runId": "folder_partition_update_schema_with_partition_autodetect.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847820000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json index 4d23cadbbc4d3..154bce421e18a 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json @@ -1483,5 +1483,49 @@ "runId": "folder_partition_with_partition_autodetect_traverse_all.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847820000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json index 6017a27a88895..f483f806e6193 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json @@ -1483,5 +1483,49 @@ "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847820000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json index 90a361219c1bf..38ce5188e0a8e 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json @@ -2625,5 +2625,159 @@ "runId": "multiple_files.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847610000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847620000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847630000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847670000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847660000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847640000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847650000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json index c67977ef7fa1b..7f657cb69180a 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json @@ -911,5 +911,49 @@ "runId": "multiple_spec_for_files.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847620000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847630000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json index d96bdce912082..6e2e966f1f7b4 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json @@ -1338,5 +1338,49 @@ "runId": "multiple_specs_of_different_buckets.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847620000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket-2/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847630000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json index 7703d137ddd29..be3d2efed088e 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json @@ -684,5 +684,27 @@ "runId": "single_file.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847620000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/test_s3.py b/metadata-ingestion/tests/integration/s3/test_s3.py index 54156610c6872..0e73cdca006bd 100644 --- a/metadata-ingestion/tests/integration/s3/test_s3.py +++ b/metadata-ingestion/tests/integration/s3/test_s3.py @@ -242,6 +242,7 @@ def test_data_lake_local_ingest( golden_path=f"{test_resources_dir}/golden-files/local/golden_mces_{source_file}", ignore_paths=[ r"root\[\d+\]\['aspect'\]\['json'\]\['lastUpdatedTimestamp'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\[\d+\]\['value'\]\['time'\]", r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['created'\]\['time'\]", # root[41]['aspect']['json']['fieldProfiles'][0]['sampleValues'][0] r"root\[\d+\]\['aspect'\]\['json'\]\['fieldProfiles'\]\[\d+\]\['sampleValues'\]", diff --git a/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json b/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json index 6a3ce983950b0..82c760458ca14 100644 --- a/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json +++ b/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json @@ -1514,8 +1514,8 @@ "json": { "timestampMillis": 1652353200000, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:user@mydomain.com", "operationType": "CREATE", @@ -1537,8 +1537,8 @@ "json": { "timestampMillis": 1652353200000, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:user@mydomain.com", "operationType": "ALTER", @@ -2023,8 +2023,8 @@ "json": { "timestampMillis": 1652353200000, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "rowCount": 3, "columnCount": 15 @@ -2099,5 +2099,27 @@ "runId": "salesforce-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:salesforce,Property__c,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1652784043000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1652353200000, + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py index d995404ad69a5..97f65f1bd6a5b 100644 --- a/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py +++ b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py @@ -14,6 +14,9 @@ make_dataset_urn, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.auto_work_units.auto_dataset_properties_aspect import ( + auto_patch_last_modified, +) from datahub.ingestion.api.source_helpers import ( _prepend_platform_instance, auto_browse_path_v2, @@ -21,8 +24,15 @@ auto_lowercase_urns, auto_status_aspect, auto_workunit, + create_dataset_props_patch_builder, ) from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.metadata.schema_classes import ( + DatasetPropertiesClass, + OperationTypeClass, + TimeStampClass, +) +from datahub.specific.dataset import DatasetPatchBuilder _base_metadata: List[ Union[MetadataChangeProposalWrapper, models.MetadataChangeEventClass] @@ -146,7 +156,6 @@ def _make_browse_path_entries(path: List[str]) -> List[models.BrowsePathEntryCla def prepend_platform_instance( path: List[models.BrowsePathEntryClass], ) -> List[models.BrowsePathEntryClass]: - platform = "platform" instance = "instance" return _prepend_platform_instance(path, platform, instance) @@ -656,3 +665,150 @@ def test_auto_empty_dataset_usage_statistics_invalid_timestamp( changeType=models.ChangeTypeClass.CREATE, ).as_workunit(), ] + + +def get_sample_mcps(mcps_to_append: List = []) -> List[MetadataChangeProposalWrapper]: + mcps = [ + MetadataChangeProposalWrapper( + entityUrn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)", + aspect=models.OperationClass( + timestampMillis=10, + lastUpdatedTimestamp=12, + operationType=OperationTypeClass.CREATE, + ), + ), + MetadataChangeProposalWrapper( + entityUrn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)", + aspect=models.OperationClass( + timestampMillis=11, + lastUpdatedTimestamp=20, + operationType=OperationTypeClass.CREATE, + ), + ), + ] + mcps.extend(mcps_to_append) + return mcps + + +def to_patch_work_units(patch_builder: DatasetPatchBuilder) -> List[MetadataWorkUnit]: + return [ + MetadataWorkUnit( + id=MetadataWorkUnit.generate_workunit_id(patch_mcp), mcp_raw=patch_mcp + ) + for patch_mcp in patch_builder.build() + ] + + +def get_auto_generated_wu() -> List[MetadataWorkUnit]: + dataset_patch_builder = DatasetPatchBuilder( + urn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)" + ).set_last_modified(TimeStampClass(time=20)) + + auto_generated_work_units = to_patch_work_units(dataset_patch_builder) + + return auto_generated_work_units + + +@freeze_time("2023-01-02 00:00:00") +def test_auto_patch_last_modified_no_change(): + mcps = [ + MetadataChangeProposalWrapper( + entityUrn="urn:li:container:008e111aa1d250dd52e0fd5d4b307b1a", + aspect=models.StatusClass(removed=False), + ) + ] + + initial_wu = list(auto_workunit(mcps)) + + expected = initial_wu + + assert ( + list(auto_patch_last_modified(initial_wu)) == expected + ) # There should be no change + + +@freeze_time("2023-01-02 00:00:00") +def test_auto_patch_last_modified_max_last_updated_timestamp(): + mcps = get_sample_mcps() + + expected = list(auto_workunit(mcps)) + + auto_generated_work_units = get_auto_generated_wu() + + expected.extend(auto_generated_work_units) + + # work unit should contain a path of datasetProperties with lastModified set to max of operation.lastUpdatedTime + # i.e., 20 + assert list(auto_patch_last_modified(auto_workunit(mcps))) == expected + + +@freeze_time("2023-01-02 00:00:00") +def test_auto_patch_last_modified_multi_patch(): + mcps = get_sample_mcps() + + dataset_patch_builder = DatasetPatchBuilder( + urn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)" + ) + + dataset_patch_builder.set_display_name("foo") + dataset_patch_builder.set_description("it is fake") + + patch_work_units = to_patch_work_units(dataset_patch_builder) + + work_units = [*list(auto_workunit(mcps)), *patch_work_units] + + auto_generated_work_units = get_auto_generated_wu() + + expected = [*work_units, *auto_generated_work_units] + + # In this case, the final work units include two patch units: one originating from the source and + # the other from auto_patch_last_modified. + assert list(auto_patch_last_modified(work_units)) == expected + + +@freeze_time("2023-01-02 00:00:00") +def test_auto_patch_last_modified_last_modified_patch_exist(): + mcps = get_sample_mcps() + + patch_builder = create_dataset_props_patch_builder( + dataset_urn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)", + dataset_properties=DatasetPropertiesClass( + name="foo", + description="dataset for collection of foo", + lastModified=TimeStampClass(time=20), + ), + ) + + work_units = [ + *list(auto_workunit(mcps)), + *to_patch_work_units(patch_builder), + ] + # The input and output should align since the source is generating a patch for datasetProperties with the + # lastModified attribute. + # Therefore, `auto_patch_last_modified` should not create any additional patch. + assert list(auto_patch_last_modified(work_units)) == work_units + + +@freeze_time("2023-01-02 00:00:00") +def test_auto_patch_last_modified_last_modified_patch_not_exist(): + mcps = get_sample_mcps() + + patch_builder = create_dataset_props_patch_builder( + dataset_urn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)", + dataset_properties=DatasetPropertiesClass( + name="foo", + description="dataset for collection of foo", + ), + ) + + work_units = [ + *list(auto_workunit(mcps)), + *to_patch_work_units(patch_builder), + ] + + expected = [ + *work_units, + *get_auto_generated_wu(), # The output should include an additional patch for the `lastModified` attribute. + ] + + assert list(auto_patch_last_modified(work_units)) == expected diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py index 90ff78b16f652..7d01ecd034523 100644 --- a/metadata-ingestion/tests/unit/test_dbt_source.py +++ b/metadata-ingestion/tests/unit/test_dbt_source.py @@ -1,3 +1,4 @@ +import doctest from datetime import timedelta from typing import Dict, List, Union from unittest import mock @@ -7,10 +8,8 @@ from datahub.emitter import mce_builder from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.source.dbt.dbt_cloud import ( - DBTCloudConfig, - infer_metadata_endpoint, -) +from datahub.ingestion.source.dbt import dbt_cloud +from datahub.ingestion.source.dbt.dbt_cloud import DBTCloudConfig from datahub.ingestion.source.dbt.dbt_core import ( DBTCoreConfig, DBTCoreSource, @@ -401,17 +400,7 @@ def test_dbt_cloud_config_with_defined_metadata_endpoint(): def test_infer_metadata_endpoint() -> None: - assert ( - infer_metadata_endpoint("https://cloud.getdbt.com") - == "https://metadata.cloud.getdbt.com/graphql" - ) - assert ( - infer_metadata_endpoint("https://prefix.us1.dbt.com") - == "https://prefix.metadata.us1.dbt.com/graphql" - ) - assert ( - infer_metadata_endpoint("http://dbt.corp.internal") - ) == "http://metadata.dbt.corp.internal/graphql" + assert doctest.testmod(dbt_cloud, raise_on_error=True).attempted > 0 def test_dbt_time_parsing() -> None: diff --git a/metadata-integration/java/datahub-protobuf-example/build.gradle b/metadata-integration/java/datahub-protobuf-example/build.gradle index 1efb43360457a..3dea6554bd264 100644 --- a/metadata-integration/java/datahub-protobuf-example/build.gradle +++ b/metadata-integration/java/datahub-protobuf-example/build.gradle @@ -13,8 +13,8 @@ repositories { } ext { - protobuf_version = '3.19.3' - datahub_protobuf_version = '0.8.45-SNAPSHOT' + protobuf_version = '3.25.5' + datahub_protobuf_version = '0.14.1' } configurations { @@ -66,13 +66,12 @@ task publishSchema(dependsOn: build) { fileTree("schema").matching { exclude "protobuf/meta/**" - }.each {f -> + }.each { f -> doLast { javaexec { - executable = javaLauncher.get().getExecutablePath().getAsFile().getAbsolutePath() classpath = configurations.datahub - main = "datahub.protobuf.Proto2DataHub" - args = ["--descriptor", "${projectDir}/build/descriptors/main.dsc", "--file", file(f).getAbsoluteFile()] + mainClass = "datahub.protobuf.Proto2DataHub" + args = ["--descriptor", "${projectDir}/build/descriptors/main.dsc", "--file", file(f).absolutePath] } } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/JavaGraphClient.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/JavaGraphClient.java index c54ba4a222b73..12c59324e3f7c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/JavaGraphClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/JavaGraphClient.java @@ -9,6 +9,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.search.utils.QueryUtils; +import io.datahubproject.metadata.context.OperationContext; import java.net.URISyntaxException; import java.util.List; import java.util.stream.Collectors; @@ -19,10 +20,13 @@ @Slf4j public class JavaGraphClient implements GraphClient { - GraphService _graphService; + private final OperationContext systemOpContext; + private final GraphService graphService; - public JavaGraphClient(@Nonnull GraphService graphService) { - this._graphService = graphService; + public JavaGraphClient( + @Nonnull OperationContext systemOpContext, @Nonnull GraphService graphService) { + this.systemOpContext = systemOpContext; + this.graphService = graphService; } /** @@ -43,7 +47,8 @@ public EntityRelationships getRelatedEntities( count = count == null ? DEFAULT_PAGE_SIZE : count; RelatedEntitiesResult relatedEntitiesResult = - _graphService.findRelatedEntities( + graphService.findRelatedEntities( + systemOpContext, null, QueryUtils.newFilter("urn", rawUrn), null, @@ -91,7 +96,8 @@ public EntityLineageResult getLineageEntities( @Nullable Integer count, int maxHops, String actor) { - return _graphService.getLineage( + return graphService.getLineage( + systemOpContext, UrnUtils.getUrn(rawUrn), direction, start != null ? start : 0, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java index 0dff287080842..f9287ab34cf19 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java @@ -64,13 +64,7 @@ public EntityLineageResult getLineage( if (separateSiblings) { return ValidationUtils.validateEntityLineageResult( opContext, - _graphService.getLineage( - entityUrn, - direction, - offset, - count, - maxHops, - opContext.getSearchContext().getLineageFlags()), + _graphService.getLineage(opContext, entityUrn, direction, offset, count, maxHops), _entityService); } @@ -81,13 +75,7 @@ public EntityLineageResult getLineage( } EntityLineageResult entityLineage = - _graphService.getLineage( - entityUrn, - direction, - offset, - count, - maxHops, - opContext.getSearchContext().getLineageFlags()); + _graphService.getLineage(opContext, entityUrn, direction, offset, count, maxHops); Siblings siblingAspectOfEntity = (Siblings) _entityService.getLatestAspect(opContext, entityUrn, SIBLINGS_ASPECT_NAME); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/SystemGraphRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/SystemGraphRetriever.java new file mode 100644 index 0000000000000..33cb1a7130f14 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/SystemGraphRetriever.java @@ -0,0 +1,48 @@ +package com.linkedin.metadata.graph; + +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.RelationshipFilter; +import com.linkedin.metadata.query.filter.SortCriterion; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Setter; + +@Builder +public class SystemGraphRetriever implements GraphRetriever { + @Setter private OperationContext systemOperationContext; + @Nonnull private final GraphService graphService; + + @Nonnull + @Override + public RelatedEntitiesScrollResult scrollRelatedEntities( + @Nullable List sourceTypes, + @Nonnull Filter sourceEntityFilter, + @Nullable List destinationTypes, + @Nonnull Filter destinationEntityFilter, + @Nonnull List relationshipTypes, + @Nonnull RelationshipFilter relationshipFilter, + @Nonnull List sortCriteria, + @Nullable String scrollId, + int count, + @Nullable Long startTimeMillis, + @Nullable Long endTimeMillis) { + return graphService.scrollRelatedEntities( + systemOperationContext, + sourceTypes, + sourceEntityFilter, + destinationTypes, + destinationEntityFilter, + relationshipTypes, + relationshipFilter, + sortCriteria, + scrollId, + count, + startTimeMillis, + endTimeMillis); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java index 6703e07bfd915..352e89baefc25 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java @@ -19,6 +19,7 @@ import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.query.filter.RelationshipFilter; import com.linkedin.metadata.query.filter.SortCriterion; +import io.datahubproject.metadata.context.OperationContext; import io.dgraph.DgraphClient; import io.dgraph.DgraphProto.Mutation; import io.dgraph.DgraphProto.NQuad; @@ -453,6 +454,7 @@ public void removeEdge(final Edge edge) { @Nonnull @Override public RelatedEntitiesResult findRelatedEntities( + @Nonnull final OperationContext opContext, @Nullable List sourceTypes, @Nonnull Filter sourceEntityFilter, @Nullable List destinationTypes, @@ -662,7 +664,7 @@ protected static List getRelatedEntitiesFromResponseData( } @Override - public void removeNode(@Nonnull Urn urn) { + public void removeNode(@Nonnull final OperationContext opContext, @Nonnull Urn urn) { String query = String.format("query {\n" + " node as var(func: eq(urn, \"%s\"))\n" + "}", urn); String deletion = "uid(node) * * ."; @@ -679,6 +681,7 @@ public void removeNode(@Nonnull Urn urn) { @Override public void removeEdgesFromNode( + @Nonnull final OperationContext opContext, @Nonnull Urn urn, @Nonnull List relationshipTypes, @Nonnull RelationshipFilter relationshipFilter) { @@ -782,6 +785,7 @@ public void clear() { @Nonnull @Override public RelatedEntitiesScrollResult scrollRelatedEntities( + @Nonnull OperationContext opContext, @Nullable List sourceTypes, @Nonnull Filter sourceEntityFilter, @Nullable List destinationTypes, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index 50e5aa6ba893d..40fa79a0ef171 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -14,6 +14,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.IntegerArray; +import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.graph.GraphFilters; import com.linkedin.metadata.graph.LineageDirection; @@ -34,14 +35,17 @@ import com.linkedin.metadata.utils.DataPlatformInstanceUtils; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.metrics.MetricUtils; +import io.datahubproject.metadata.context.OperationContext; import io.opentelemetry.extension.annotations.WithSpan; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; @@ -105,8 +109,7 @@ public class ESGraphQueryDAO { static final String GROUP_BY_DESTINATION_AGG = "group_by_destination"; static final String TOP_DOCUMENTS_AGG = "top_documents"; - @Nonnull - public static void addFilterToQueryBuilder( + private static void addFilterToQueryBuilder( @Nonnull Filter filter, @Nullable String node, BoolQueryBuilder rootQuery) { BoolQueryBuilder orQuery = new BoolQueryBuilder(); for (ConjunctiveCriterion conjunction : filter.getOr()) { @@ -231,7 +234,7 @@ private SearchResponse executeGroupByLineageSearchQuery( } } - private BoolQueryBuilder getAggregationFilter( + private static BoolQueryBuilder getAggregationFilter( Pair pair, RelationshipDirection direction) { BoolQueryBuilder subFilter = QueryBuilders.boolQuery(); TermQueryBuilder relationshipTypeTerm = @@ -258,6 +261,7 @@ private BoolQueryBuilder getAggregationFilter( } public SearchResponse getSearchResponse( + @Nonnull final OperationContext opContext, @Nullable final List sourceTypes, @Nonnull final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -268,6 +272,8 @@ public SearchResponse getSearchResponse( final int count) { BoolQueryBuilder finalQuery = buildQuery( + opContext, + graphQueryConfiguration, sourceTypes, sourceEntityFilter, destinationTypes, @@ -279,6 +285,8 @@ public SearchResponse getSearchResponse( } public static BoolQueryBuilder buildQuery( + @Nonnull final OperationContext opContext, + @Nonnull final GraphQueryConfiguration graphQueryConfiguration, @Nullable final List sourceTypes, @Nullable final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -286,6 +294,8 @@ public static BoolQueryBuilder buildQuery( @Nonnull final List relationshipTypes, @Nonnull final RelationshipFilter relationshipFilter) { return buildQuery( + opContext, + graphQueryConfiguration, sourceTypes, sourceEntityFilter, destinationTypes, @@ -296,6 +306,8 @@ public static BoolQueryBuilder buildQuery( } public static BoolQueryBuilder buildQuery( + @Nonnull final OperationContext opContext, + @Nonnull final GraphQueryConfiguration graphQueryConfiguration, @Nullable final List sourceTypes, @Nonnull final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -345,19 +357,23 @@ public static BoolQueryBuilder buildQuery( if (lifecycleOwner != null) { finalQuery.filter(QueryBuilders.termQuery(EDGE_FIELD_LIFECYCLE_OWNER, lifecycleOwner)); } + if (!Optional.ofNullable(opContext.getSearchContext().getSearchFlags().isIncludeSoftDeleted()) + .orElse(false)) { + applyExcludeSoftDelete(graphQueryConfiguration, finalQuery); + } return finalQuery; } @WithSpan public LineageResponse getLineage( + @Nonnull final OperationContext opContext, @Nonnull Urn entityUrn, @Nonnull LineageDirection direction, GraphFilters graphFilters, int offset, int count, - int maxHops, - @Nullable LineageFlags lineageFlags) { + int maxHops) { Map result = new HashMap<>(); long currentTime = System.currentTimeMillis(); long remainingTime = graphQueryConfiguration.getTimeoutSeconds() * 1000; @@ -388,6 +404,7 @@ public LineageResponse getLineage( // Do one hop on the lineage graph Stream intermediateStream = processOneHopLineage( + opContext, currentLevel, remainingTime, direction, @@ -398,7 +415,6 @@ public LineageResponse getLineage( existingPaths, exploreMultiplePaths, result, - lineageFlags, i); currentLevel = intermediateStream.collect(Collectors.toList()); currentTime = System.currentTimeMillis(); @@ -421,6 +437,7 @@ public LineageResponse getLineage( } private Stream processOneHopLineage( + @Nonnull OperationContext opContext, List currentLevel, Long remainingTime, LineageDirection direction, @@ -431,7 +448,6 @@ private Stream processOneHopLineage( Map existingPaths, boolean exploreMultiplePaths, Map result, - LineageFlags lineageFlags, int i) { // Do one hop on the lineage graph @@ -439,6 +455,7 @@ private Stream processOneHopLineage( int remainingHops = maxHops - numHops; List oneHopRelationships = getLineageRelationshipsInBatches( + opContext, currentLevel, direction, graphFilters, @@ -448,8 +465,10 @@ private Stream processOneHopLineage( remainingHops, remainingTime, existingPaths, - exploreMultiplePaths, - lineageFlags); + exploreMultiplePaths); + + final LineageFlags lineageFlags = opContext.getSearchContext().getLineageFlags(); + for (LineageRelationship oneHopRelnship : oneHopRelationships) { if (result.containsKey(oneHopRelnship.getEntity())) { log.debug("Urn encountered again during graph walk {}", oneHopRelnship.getEntity()); @@ -487,6 +506,7 @@ private Stream processOneHopLineage( if (!additionalCurrentLevel.isEmpty()) { Stream ignoreAsHopUrns = processOneHopLineage( + opContext, additionalCurrentLevel, remainingTime, direction, @@ -497,7 +517,6 @@ private Stream processOneHopLineage( existingPaths, exploreMultiplePaths, result, - lineageFlags, i); intermediateStream = Stream.concat(intermediateStream, ignoreAsHopUrns); } @@ -560,6 +579,7 @@ private LineageRelationship mergeLineageRelationships( // Get 1-hop lineage relationships asynchronously in batches with timeout @WithSpan public List getLineageRelationshipsInBatches( + @Nonnull final OperationContext opContext, @Nonnull List entityUrns, @Nonnull LineageDirection direction, GraphFilters graphFilters, @@ -569,8 +589,7 @@ public List getLineageRelationshipsInBatches( int remainingHops, long remainingTime, Map existingPaths, - boolean exploreMultiplePaths, - @Nullable LineageFlags lineageFlags) { + boolean exploreMultiplePaths) { List> batches = Lists.partition(entityUrns, graphQueryConfiguration.getBatchSize()); return ConcurrencyUtils.getAllCompleted( batches.stream() @@ -579,6 +598,7 @@ public List getLineageRelationshipsInBatches( CompletableFuture.supplyAsync( () -> getLineageRelationships( + opContext, batchUrns, direction, graphFilters, @@ -587,8 +607,7 @@ public List getLineageRelationshipsInBatches( numHops, remainingHops, existingPaths, - exploreMultiplePaths, - lineageFlags))) + exploreMultiplePaths))) .collect(Collectors.toList()), remainingTime, TimeUnit.MILLISECONDS) @@ -600,6 +619,7 @@ public List getLineageRelationshipsInBatches( // Get 1-hop lineage relationships @WithSpan private List getLineageRelationships( + @Nonnull final OperationContext opContext, @Nonnull List entityUrns, @Nonnull LineageDirection direction, GraphFilters graphFilters, @@ -608,8 +628,8 @@ private List getLineageRelationships( int numHops, int remainingHops, Map existingPaths, - boolean exploreMultiplePaths, - @Nullable LineageFlags lineageFlags) { + boolean exploreMultiplePaths) { + final LineageFlags lineageFlags = opContext.getSearchContext().getLineageFlags(); Map> urnsPerEntityType = entityUrns.stream().collect(Collectors.groupingBy(Urn::getEntityType)); Map> edgesPerEntityType = @@ -628,7 +648,7 @@ private List getLineageRelationships( .collect(Collectors.toSet()); QueryBuilder finalQuery = - getLineageQuery(urnsPerEntityType, edgesPerEntityType, graphFilters, lineageFlags); + getLineageQuery(opContext, urnsPerEntityType, edgesPerEntityType, graphFilters); SearchResponse response; if (lineageFlags != null && lineageFlags.getEntitiesExploredPerHopLimit() != null) { response = @@ -660,11 +680,12 @@ private List getLineageRelationships( } @VisibleForTesting - public QueryBuilder getLineageQuery( + public static QueryBuilder getLineageQuery( + @Nonnull OperationContext opContext, @Nonnull Map> urnsPerEntityType, @Nonnull Map> edgesPerEntityType, - @Nonnull GraphFilters graphFilters, - @Nullable LineageFlags lineageFlags) { + @Nonnull GraphFilters graphFilters) { + final LineageFlags lineageFlags = opContext.getSearchContext().getLineageFlags(); BoolQueryBuilder entityTypeQueries = QueryBuilders.boolQuery(); // Get all relation types relevant to the set of urns to hop from urnsPerEntityType.forEach( @@ -690,7 +711,7 @@ public QueryBuilder getLineageQuery( && lineageFlags.getStartTimeMillis() != null && lineageFlags.getEndTimeMillis() != null) { finalQuery.filter( - TimeFilterUtils.getEdgeTimeFilterQuery( + GraphFilterUtils.getEdgeTimeFilterQuery( lineageFlags.getStartTimeMillis(), lineageFlags.getEndTimeMillis())); } else { log.debug("Empty time filter range provided. Skipping application of time filters"); @@ -700,7 +721,7 @@ public QueryBuilder getLineageQuery( } @VisibleForTesting - public QueryBuilder getLineageQueryForEntityType( + static QueryBuilder getLineageQueryForEntityType( @Nonnull List urns, @Nonnull List lineageEdges, @Nonnull GraphFilters graphFilters) { @@ -769,7 +790,7 @@ private void addViaNodeBoostQuery(final SearchSourceBuilder sourceBuilder) { * the Graph Store. */ @VisibleForTesting - public static void addEdgeToPaths( + static void addEdgeToPaths( @Nonnull final Map existingPaths, @Nonnull final Urn parentUrn, @Nonnull final Urn childUrn) { @@ -782,7 +803,7 @@ private static boolean containsCycle(final UrnArray path) { return (path.size() != urnSet.size()); } - public static boolean addEdgeToPaths( + static boolean addEdgeToPaths( @Nonnull final Map existingPaths, @Nonnull final Urn parentUrn, final Urn viaUrn, @@ -1317,6 +1338,7 @@ public static class LineageResponse { } public SearchResponse getSearchResponse( + @Nonnull final OperationContext opContext, @Nullable final List sourceTypes, @Nullable final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -1329,6 +1351,8 @@ public SearchResponse getSearchResponse( BoolQueryBuilder finalQuery = buildQuery( + opContext, + graphQueryConfiguration, sourceTypes, sourceEntityFilter, destinationTypes, @@ -1371,4 +1395,17 @@ private SearchResponse executeScrollSearchQuery( throw new ESQueryException("Search query failed:", e); } } + + private static void applyExcludeSoftDelete( + GraphQueryConfiguration graphQueryConfiguration, BoolQueryBuilder boolQueryBuilder) { + if (graphQueryConfiguration.isGraphStatusEnabled()) { + Arrays.stream(EdgeUrnType.values()) + .map( + edgeUrnType -> + QueryBuilders.termsQuery( + GraphFilterUtils.getUrnStatusFieldName(edgeUrnType), "true")) + .filter(statusQuery -> !boolQueryBuilder.mustNot().contains(statusQuery)) + .forEach(boolQueryBuilder::mustNot); + } + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java index ddbd00f90ef68..ba481bdfa109f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java @@ -4,10 +4,12 @@ import static com.linkedin.metadata.graph.elastic.ElasticSearchGraphService.INDEX_NAME; import com.google.common.collect.ImmutableList; +import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.RelationshipFilter; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; +import io.datahubproject.metadata.context.OperationContext; import java.util.List; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -17,7 +19,9 @@ import org.opensearch.action.update.UpdateRequest; import org.opensearch.common.xcontent.XContentType; import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.reindex.BulkByScrollResponse; +import org.opensearch.script.Script; @Slf4j @RequiredArgsConstructor @@ -25,8 +29,7 @@ public class ESGraphWriteDAO { private final IndexConvention indexConvention; private final ESBulkProcessor bulkProcessor; private final int numRetries; - - private static final String ES_WRITES_METRIC = "num_elasticSearch_writes"; + private final GraphQueryConfiguration graphQueryConfiguration; /** * Updates or inserts the given search document. @@ -56,6 +59,7 @@ public void deleteDocument(@Nonnull String docId) { } public BulkByScrollResponse deleteByQuery( + @Nonnull final OperationContext opContext, @Nullable final String sourceType, @Nonnull final Filter sourceEntityFilter, @Nullable final String destinationType, @@ -64,6 +68,8 @@ public BulkByScrollResponse deleteByQuery( @Nonnull final RelationshipFilter relationshipFilter) { BoolQueryBuilder finalQuery = buildQuery( + opContext, + graphQueryConfiguration, sourceType == null ? ImmutableList.of() : ImmutableList.of(sourceType), sourceEntityFilter, destinationType == null ? ImmutableList.of() : ImmutableList.of(destinationType), @@ -77,6 +83,7 @@ public BulkByScrollResponse deleteByQuery( } public BulkByScrollResponse deleteByQuery( + @Nonnull final OperationContext opContext, @Nullable final String sourceType, @Nonnull final Filter sourceEntityFilter, @Nullable final String destinationType, @@ -86,6 +93,8 @@ public BulkByScrollResponse deleteByQuery( String lifecycleOwner) { BoolQueryBuilder finalQuery = buildQuery( + opContext, + graphQueryConfiguration, sourceType == null ? ImmutableList.of() : ImmutableList.of(sourceType), sourceEntityFilter, destinationType == null ? ImmutableList.of() : ImmutableList.of(destinationType), @@ -98,4 +107,12 @@ public BulkByScrollResponse deleteByQuery( .deleteByQuery(finalQuery, indexConvention.getIndexName(INDEX_NAME)) .orElse(null); } + + @Nullable + public BulkByScrollResponse updateByQuery( + @Nonnull Script script, @Nonnull final QueryBuilder query) { + return bulkProcessor + .updateByQuery(script, query, indexConvention.getIndexName(INDEX_NAME)) + .orElse(null); + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java index e1532ea4e26c0..1769c53e4cd9b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java @@ -1,12 +1,16 @@ package com.linkedin.metadata.graph.elastic; import static com.linkedin.metadata.aspect.models.graph.Edge.*; +import static com.linkedin.metadata.graph.elastic.GraphFilterUtils.getUrnStatusFieldName; +import static com.linkedin.metadata.graph.elastic.GraphFilterUtils.getUrnStatusQuery; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.StringArray; import com.linkedin.metadata.aspect.models.graph.Edge; +import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; import com.linkedin.metadata.aspect.models.graph.RelatedEntities; import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; import com.linkedin.metadata.aspect.models.graph.RelatedEntity; @@ -17,7 +21,6 @@ import com.linkedin.metadata.graph.LineageRelationshipArray; import com.linkedin.metadata.graph.RelatedEntitiesResult; import com.linkedin.metadata.models.registry.LineageRegistry; -import com.linkedin.metadata.query.LineageFlags; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -35,6 +38,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.structured.StructuredPropertyDefinition; import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.OperationContext; import io.opentelemetry.extension.annotations.WithSpan; import java.io.IOException; import java.util.ArrayList; @@ -51,13 +55,15 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.opensearch.action.search.SearchResponse; +import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; +import org.opensearch.script.Script; +import org.opensearch.script.ScriptType; import org.opensearch.search.SearchHit; @Slf4j @RequiredArgsConstructor public class ElasticSearchGraphService implements GraphService, ElasticSearchIndexed { - private final LineageRegistry _lineageRegistry; private final ESBulkProcessor _esBulkProcessor; private final IndexConvention _indexConvention; @@ -68,7 +74,7 @@ public class ElasticSearchGraphService implements GraphService, ElasticSearchInd public static final String INDEX_NAME = "graph_service_v1"; private static final Map EMPTY_HASH = new HashMap<>(); - private String toDocument(@Nonnull final Edge edge) { + private static String toDocument(@Nonnull final Edge edge) { final ObjectNode searchDocument = JsonNodeFactory.instance.objectNode(); final ObjectNode sourceObject = JsonNodeFactory.instance.objectNode(); @@ -114,6 +120,18 @@ private String toDocument(@Nonnull final Edge edge) { if (edge.getVia() != null) { searchDocument.put(EDGE_FIELD_VIA, edge.getVia().toString()); } + if (edge.getViaStatus() != null) { + searchDocument.put(EDGE_FIELD_VIA_STATUS, edge.getViaStatus()); + } + if (edge.getLifecycleOwnerStatus() != null) { + searchDocument.put(EDGE_FIELD_LIFECYCLE_OWNER_STATUS, edge.getLifecycleOwnerStatus()); + } + if (edge.getSourceStatus() != null) { + searchDocument.put(EDGE_SOURCE_STATUS, edge.getSourceStatus()); + } + if (edge.getDestinationStatus() != null) { + searchDocument.put(EDGE_DESTINATION_STATUS, edge.getDestinationStatus()); + } log.debug("Search doc for write {}", searchDocument); return searchDocument.toString(); @@ -142,8 +160,10 @@ public void removeEdge(@Nonnull final Edge edge) { _graphWriteDAO.deleteDocument(docId); } + @Override @Nonnull public RelatedEntitiesResult findRelatedEntities( + @Nonnull final OperationContext opContext, @Nullable final List sourceTypes, @Nonnull final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -161,6 +181,7 @@ public RelatedEntitiesResult findRelatedEntities( SearchResponse response = _graphReadDAO.getSearchResponse( + opContext, sourceTypes, sourceEntityFilter, destinationTypes, @@ -188,35 +209,16 @@ public RelatedEntitiesResult findRelatedEntities( @Override @Deprecated public EntityLineageResult getLineage( + @Nonnull final OperationContext opContext, @Nonnull Urn entityUrn, @Nonnull LineageDirection direction, GraphFilters graphFilters, int offset, int count, int maxHops) { - ESGraphQueryDAO.LineageResponse lineageResponse = - _graphReadDAO.getLineage(entityUrn, direction, graphFilters, offset, count, maxHops, null); - return new EntityLineageResult() - .setRelationships(new LineageRelationshipArray(lineageResponse.getLineageRelationships())) - .setStart(offset) - .setCount(count) - .setTotal(lineageResponse.getTotal()); - } - - @Nonnull - @WithSpan - @Override - public EntityLineageResult getLineage( - @Nonnull Urn entityUrn, - @Nonnull LineageDirection direction, - GraphFilters graphFilters, - int offset, - int count, - int maxHops, - @Nullable LineageFlags lineageFlags) { ESGraphQueryDAO.LineageResponse lineageResponse = _graphReadDAO.getLineage( - entityUrn, direction, graphFilters, offset, count, maxHops, lineageFlags); + opContext, entityUrn, direction, graphFilters, offset, count, maxHops); return new EntityLineageResult() .setRelationships(new LineageRelationshipArray(lineageResponse.getLineageRelationships())) .setStart(offset) @@ -224,13 +226,14 @@ public EntityLineageResult getLineage( .setTotal(lineageResponse.getTotal()); } - private Filter createUrnFilter(@Nonnull final Urn urn) { + private static Filter createUrnFilter(@Nonnull final Urn urn) { Filter filter = new Filter(); CriterionArray criterionArray = new CriterionArray(); Criterion criterion = new Criterion(); criterion.setCondition(Condition.EQUAL); criterion.setField("urn"); criterion.setValue(urn.toString()); + criterion.setValues(new StringArray(urn.toString())); criterionArray.add(criterion); filter.setOr( new ConjunctiveCriterionArray( @@ -239,7 +242,7 @@ private Filter createUrnFilter(@Nonnull final Urn urn) { return filter; } - public void removeNode(@Nonnull final Urn urn) { + public void removeNode(@Nonnull final OperationContext opContext, @Nonnull final Urn urn) { Filter urnFilter = createUrnFilter(urn); Filter emptyFilter = new Filter().setOr(new ConjunctiveCriterionArray()); List relationshipTypes = new ArrayList<>(); @@ -250,19 +253,47 @@ public void removeNode(@Nonnull final Urn urn) { new RelationshipFilter().setDirection(RelationshipDirection.INCOMING); _graphWriteDAO.deleteByQuery( - null, urnFilter, null, emptyFilter, relationshipTypes, outgoingFilter); + opContext, null, urnFilter, null, emptyFilter, relationshipTypes, outgoingFilter); _graphWriteDAO.deleteByQuery( - null, urnFilter, null, emptyFilter, relationshipTypes, incomingFilter); + opContext, null, urnFilter, null, emptyFilter, relationshipTypes, incomingFilter); // Delete all edges where this entity is a lifecycle owner _graphWriteDAO.deleteByQuery( - null, emptyFilter, null, emptyFilter, relationshipTypes, incomingFilter, urn.toString()); + opContext, + null, + emptyFilter, + null, + emptyFilter, + relationshipTypes, + incomingFilter, + urn.toString()); + } - return; + @Override + public void setEdgeStatus( + @Nonnull Urn urn, boolean removed, @Nonnull EdgeUrnType... edgeUrnTypes) { + + for (EdgeUrnType edgeUrnType : edgeUrnTypes) { + // Update the graph status fields per urn type which do not match target state + QueryBuilder negativeQuery = getUrnStatusQuery(edgeUrnType, urn, !removed); + + // Set up the script to update the boolean field + String scriptContent = + "ctx._source." + getUrnStatusFieldName(edgeUrnType) + " = params.newValue"; + Script script = + new Script( + ScriptType.INLINE, + "painless", + scriptContent, + Collections.singletonMap("newValue", removed)); + + _graphWriteDAO.updateByQuery(script, negativeQuery); + } } public void removeEdgesFromNode( + @Nonnull final OperationContext opContext, @Nonnull final Urn urn, @Nonnull final List relationshipTypes, @Nonnull final RelationshipFilter relationshipFilter) { @@ -271,7 +302,7 @@ public void removeEdgesFromNode( Filter emptyFilter = new Filter().setOr(new ConjunctiveCriterionArray()); _graphWriteDAO.deleteByQuery( - null, urnFilter, null, emptyFilter, relationshipTypes, relationshipFilter); + opContext, null, urnFilter, null, emptyFilter, relationshipTypes, relationshipFilter); } @Override @@ -308,8 +339,8 @@ public boolean supportsMultiHop() { } @Nonnull - @Override public RelatedEntitiesScrollResult scrollRelatedEntities( + @Nonnull final OperationContext opContext, @Nullable List sourceTypes, @Nullable Filter sourceEntityFilter, @Nullable List destinationTypes, @@ -326,6 +357,7 @@ public RelatedEntitiesScrollResult scrollRelatedEntities( SearchResponse response = _graphReadDAO.getSearchResponse( + opContext, sourceTypes, sourceEntityFilter, destinationTypes, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphFilterUtils.java similarity index 67% rename from metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java rename to metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphFilterUtils.java index 7ee84ce834cfa..982bcae9b5fd9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphFilterUtils.java @@ -1,14 +1,81 @@ package com.linkedin.metadata.graph.elastic; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_DESTINATION_STATUS; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_DESTINATION_URN_FIELD; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_FIELD_LIFECYCLE_OWNER; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_FIELD_LIFECYCLE_OWNER_STATUS; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_FIELD_VIA; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_FIELD_VIA_STATUS; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_SOURCE_STATUS; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_SOURCE_URN_FIELD; import static com.linkedin.metadata.graph.elastic.ESGraphQueryDAO.*; +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; +import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; @Slf4j -public class TimeFilterUtils { +public class GraphFilterUtils { + + public static QueryBuilder getUrnStatusQuery( + @Nonnull EdgeUrnType edgeUrnType, @Nonnull final Urn urn, @Nonnull Boolean removed) { + + final String urnField = getUrnFieldName(edgeUrnType); + final String statusField = getUrnStatusFieldName(edgeUrnType); + + // Create a BoolQueryBuilder + BoolQueryBuilder finalQuery = QueryBuilders.boolQuery(); + + // urn filter + finalQuery.filter(QueryBuilders.termQuery(urnField, urn.toString())); + + // status filter + if (removed) { + finalQuery.filter(QueryBuilders.termQuery(statusField, removed.toString())); + } else { + finalQuery.minimumShouldMatch(1); + finalQuery.should(QueryBuilders.termQuery(statusField, removed.toString())); + finalQuery.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(statusField))); + } + + return finalQuery; + } + + public static String getUrnStatusFieldName(EdgeUrnType edgeUrnType) { + switch (edgeUrnType) { + case SOURCE: + return EDGE_SOURCE_STATUS; + case DESTINATION: + return EDGE_DESTINATION_STATUS; + case VIA: + return EDGE_FIELD_VIA_STATUS; + case LIFECYCLE_OWNER: + return EDGE_FIELD_LIFECYCLE_OWNER_STATUS; + default: + throw new IllegalStateException( + String.format("Unhandled EdgeUrnType. Found: %s", edgeUrnType)); + } + } + + public static String getUrnFieldName(EdgeUrnType edgeUrnType) { + switch (edgeUrnType) { + case SOURCE: + return EDGE_SOURCE_URN_FIELD; + case DESTINATION: + return EDGE_DESTINATION_URN_FIELD; + case VIA: + return EDGE_FIELD_VIA; + case LIFECYCLE_OWNER: + return EDGE_FIELD_LIFECYCLE_OWNER; + default: + throw new IllegalStateException( + String.format("Unhandled EdgeUrnType. Found: %s", edgeUrnType)); + } + } /** * In order to filter for edges that fall into a specific filter window, we perform a @@ -141,5 +208,5 @@ private static QueryBuilder buildManualLineageFilter() { return QueryBuilders.termQuery(String.format("%s.%s", PROPERTIES, SOURCE), UI); } - private TimeFilterUtils() {} + private GraphFilterUtils() {} } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphRelationshipMappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphRelationshipMappingsBuilder.java index 7a6c7701fde5f..164bf3ad17d8c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphRelationshipMappingsBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphRelationshipMappingsBuilder.java @@ -20,6 +20,8 @@ public static Map getMappings() { mappings.put(EDGE_FIELD_PROPERTIES, getMappingsForEdgeProperties()); mappings.put(EDGE_FIELD_LIFECYCLE_OWNER, getMappingsForKeyword()); mappings.put(EDGE_FIELD_VIA, getMappingsForKeyword()); + mappings.put(EDGE_FIELD_LIFECYCLE_OWNER_STATUS, getMappingsForBoolean()); + mappings.put(EDGE_FIELD_VIA_STATUS, getMappingsForBoolean()); return ImmutableMap.of("properties", mappings); } @@ -27,12 +29,17 @@ private static Map getMappingsForKeyword() { return ImmutableMap.builder().put("type", "keyword").build(); } + private static Map getMappingsForBoolean() { + return ImmutableMap.builder().put("type", "boolean").build(); + } + private static Map getMappingsForEntity() { Map mappings = ImmutableMap.builder() .put("urn", getMappingsForKeyword()) .put("entityType", getMappingsForKeyword()) + .put("removed", getMappingsForBoolean()) .build(); return ImmutableMap.of("properties", mappings); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java index 9fe9c242fe48c..75d993f52680a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java @@ -32,7 +32,7 @@ import com.linkedin.metadata.search.elasticsearch.query.request.SearchAfterWrapper; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.util.Pair; -import io.opentelemetry.extension.annotations.WithSpan; +import io.datahubproject.metadata.context.OperationContext; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collections; @@ -250,33 +250,24 @@ public void removeEdge(final Edge edge) { } @Nonnull - @WithSpan @Override public EntityLineageResult getLineage( + @Nonnull final OperationContext opContext, @Nonnull Urn entityUrn, @Nonnull LineageDirection direction, GraphFilters graphFilters, int offset, int count, int maxHops) { - return getLineage(entityUrn, direction, graphFilters, offset, count, maxHops, null); - } - - @Nonnull - @Override - public EntityLineageResult getLineage( - @Nonnull Urn entityUrn, - @Nonnull LineageDirection direction, - GraphFilters graphFilters, - int offset, - int count, - int maxHops, - @Nullable LineageFlags lineageFlags) { log.debug(String.format("Neo4j getLineage maxHops = %d", maxHops)); final var statementAndParams = generateLineageStatementAndParameters( - entityUrn, direction, graphFilters, maxHops, lineageFlags); + entityUrn, + direction, + graphFilters, + maxHops, + opContext.getSearchContext().getLineageFlags()); final var statement = statementAndParams.getFirst(); final var parameters = statementAndParams.getSecond(); @@ -457,6 +448,7 @@ private Pair> generateLineageStatementAndParameters( @Nonnull public RelatedEntitiesResult findRelatedEntities( + @Nonnull final OperationContext opContext, @Nullable final List sourceTypes, @Nonnull final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -600,7 +592,7 @@ private String computeEntityTypeWhereClause( return whereClause; } - public void removeNode(@Nonnull final Urn urn) { + public void removeNode(@Nonnull final OperationContext opContext, @Nonnull final Urn urn) { log.debug(String.format("Removing Neo4j node with urn: %s", urn)); final String srcNodeLabel = urn.getEntityType(); @@ -627,6 +619,7 @@ public void removeNode(@Nonnull final Urn urn) { * @param relationshipFilter Query relationship filter */ public void removeEdgesFromNode( + @Nonnull final OperationContext opContext, @Nonnull final Urn urn, @Nonnull final List relationshipTypes, @Nonnull final RelationshipFilter relationshipFilter) { @@ -915,6 +908,7 @@ private boolean isSourceDestReversed( @Nonnull @Override public RelatedEntitiesScrollResult scrollRelatedEntities( + @Nonnull OperationContext opContext, @Nullable List sourceTypes, @Nonnull Filter sourceEntityFilter, @Nullable List destinationTypes, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java index 435731a3f9d04..ec9c44e42f7f4 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java @@ -173,13 +173,7 @@ public LineageSearchResult searchAcrossLineage( if (cachedLineageResult == null || finalOpContext.getSearchContext().getSearchFlags().isSkipCache()) { lineageResult = - _graphService.getLineage( - sourceUrn, - direction, - 0, - MAX_RELATIONSHIPS, - maxHops, - opContext.getSearchContext().getLineageFlags()); + _graphService.getLineage(opContext, sourceUrn, direction, 0, MAX_RELATIONSHIPS, maxHops); if (cacheEnabled) { try { cache.put( @@ -210,12 +204,7 @@ public LineageSearchResult searchAcrossLineage( // we have to refetch EntityLineageResult result = _graphService.getLineage( - sourceUrn, - direction, - 0, - MAX_RELATIONSHIPS, - finalMaxHops, - opContext.getSearchContext().getLineageFlags()); + opContext, sourceUrn, direction, 0, MAX_RELATIONSHIPS, finalMaxHops); cache.put(cacheKey, result); log.debug("Refilled Cached lineage entry for: {}.", sourceUrn); } else { @@ -770,13 +759,7 @@ public LineageScrollResult scrollAcrossLineage( if (cachedLineageResult == null) { maxHops = maxHops != null ? maxHops : 1000; lineageResult = - _graphService.getLineage( - sourceUrn, - direction, - 0, - MAX_RELATIONSHIPS, - maxHops, - opContext.getSearchContext().getLineageFlags()); + _graphService.getLineage(opContext, sourceUrn, direction, 0, MAX_RELATIONSHIPS, maxHops); if (cacheEnabled) { cache.put( cacheKey, new CachedEntityLineageResult(lineageResult, System.currentTimeMillis())); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java index 2d04e99774050..6de79b6c4b181 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java @@ -87,6 +87,8 @@ public class ESIndexBuilder { @Getter private final GitVersion gitVersion; + @Getter private final int maxReindexHours; + private static final RequestOptions REQUEST_OPTIONS = RequestOptions.DEFAULT.toBuilder() .setRequestConfig(RequestConfig.custom().setSocketTimeout(180 * 1000).build()) @@ -106,6 +108,34 @@ public ESIndexBuilder( boolean enableStructuredPropertiesReindex, ElasticSearchConfiguration elasticSearchConfiguration, GitVersion gitVersion) { + this( + searchClient, + numShards, + numReplicas, + numRetries, + refreshIntervalSeconds, + indexSettingOverrides, + enableIndexSettingsReindex, + enableIndexMappingsReindex, + enableStructuredPropertiesReindex, + elasticSearchConfiguration, + gitVersion, + 0); + } + + public ESIndexBuilder( + RestHighLevelClient searchClient, + int numShards, + int numReplicas, + int numRetries, + int refreshIntervalSeconds, + Map> indexSettingOverrides, + boolean enableIndexSettingsReindex, + boolean enableIndexMappingsReindex, + boolean enableStructuredPropertiesReindex, + ElasticSearchConfiguration elasticSearchConfiguration, + GitVersion gitVersion, + int maxReindexHours) { this._searchClient = searchClient; this.numShards = numShards; this.numReplicas = numReplicas; @@ -117,6 +147,7 @@ public ESIndexBuilder( this.elasticSearchConfiguration = elasticSearchConfiguration; this.enableStructuredPropertiesReindex = enableStructuredPropertiesReindex; this.gitVersion = gitVersion; + this.maxReindexHours = maxReindexHours; RetryConfig config = RetryConfig.custom() @@ -348,10 +379,10 @@ private static String getNextIndexName(String base, long startTime) { private void reindex(ReindexConfig indexState) throws Throwable { final long startTime = System.currentTimeMillis(); - final int maxReindexHours = 8; final long initialCheckIntervalMilli = 1000; final long finalCheckIntervalMilli = 60000; - final long timeoutAt = startTime + (1000 * 60 * 60 * maxReindexHours); + final long timeoutAt = + maxReindexHours > 0 ? startTime + (1000L * 60 * 60 * maxReindexHours) : Long.MAX_VALUE; String tempIndexName = getNextIndexName(indexState.name(), startTime); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java index fc29aca411784..63a9c731a2d39 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java @@ -23,6 +23,8 @@ import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.reindex.BulkByScrollResponse; import org.opensearch.index.reindex.DeleteByQueryRequest; +import org.opensearch.index.reindex.UpdateByQueryRequest; +import org.opensearch.script.Script; @Slf4j @Builder(builderMethodName = "hiddenBuilder") @@ -30,6 +32,7 @@ public class ESBulkProcessor implements Closeable { private static final String ES_WRITES_METRIC = "num_elasticSearch_writes"; private static final String ES_BATCHES_METRIC = "num_elasticSearch_batches_submitted"; private static final String ES_DELETE_EXCEPTION_METRIC = "delete_by_query"; + private static final String ES_UPDATE_EXCEPTION_METRIC = "update_by_query"; private static final String ES_SUBMIT_DELETE_EXCEPTION_METRIC = "submit_delete_by_query_task"; private static final String ES_SUBMIT_REINDEX_METRIC = "reindex_submit"; private static final String ES_REINDEX_SUCCESS_METRIC = "reindex_success"; @@ -97,6 +100,26 @@ public Optional deleteByQuery( return deleteByQuery(queryBuilder, refresh, bulkRequestsLimit, defaultTimeout, indices); } + public Optional updateByQuery( + Script script, QueryBuilder queryBuilder, String... indices) { + // Create an UpdateByQueryRequest + UpdateByQueryRequest updateByQuery = new UpdateByQueryRequest(indices); + updateByQuery.setQuery(queryBuilder); + updateByQuery.setScript(script); + + try { + final BulkByScrollResponse updateResponse = + searchClient.updateByQuery(updateByQuery, RequestOptions.DEFAULT); + MetricUtils.counter(this.getClass(), ES_WRITES_METRIC).inc(updateResponse.getTotal()); + return Optional.of(updateResponse); + } catch (Exception e) { + log.error("ERROR: Failed to update by query. See stacktrace for a more detailed error:", e); + MetricUtils.exceptionCounter(ESBulkProcessor.class, ES_UPDATE_EXCEPTION_METRIC, e); + } + + return Optional.empty(); + } + public Optional deleteByQuery( QueryBuilder queryBuilder, boolean refresh, int limit, TimeValue timeout, String... indices) { DeleteByQueryRequest deleteByQueryRequest = diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java index 3d31b552db0bb..b6d9357ecd65e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java @@ -389,13 +389,9 @@ private Optional getNodeForValue( // By default run toString default: String value = fieldValue.toString(); - // If index type is BROWSE_PATH, make sure the value starts with a slash - if (fieldType == FieldType.BROWSE_PATH && !value.startsWith("/")) { - value = "/" + value; - } return value.isEmpty() ? Optional.empty() - : Optional.of(JsonNodeFactory.instance.textNode(fieldValue.toString())); + : Optional.of(JsonNodeFactory.instance.textNode(value)); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java new file mode 100644 index 0000000000000..7549aea2007da --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java @@ -0,0 +1,452 @@ +package com.linkedin.metadata.service; + +import static com.linkedin.metadata.Constants.FORCE_INDEXING_KEY; +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; +import static com.linkedin.metadata.search.utils.QueryUtils.createRelationshipFilter; +import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.InputField; +import com.linkedin.common.InputFields; +import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.datajob.DataJobInputOutput; +import com.linkedin.dataset.FineGrainedLineage; +import com.linkedin.dataset.FineGrainedLineageArray; +import com.linkedin.dataset.UpstreamLineage; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.aspect.models.graph.Edge; +import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; +import com.linkedin.metadata.entity.SearchIndicesService; +import com.linkedin.metadata.entity.ebean.batch.MCLItemImpl; +import com.linkedin.metadata.graph.GraphIndexUtils; +import com.linkedin.metadata.graph.GraphService; +import com.linkedin.metadata.graph.dgraph.DgraphGraphService; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.RelationshipFieldSpec; +import com.linkedin.metadata.models.extractor.FieldExtractor; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import com.linkedin.metadata.utils.SchemaFieldUtils; +import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.OperationContext; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Getter; +import lombok.Setter; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class UpdateGraphIndicesService implements SearchIndicesService { + private static final String DOWNSTREAM_OF = "DownstreamOf"; + + public static UpdateGraphIndicesService withService(GraphService graphService) { + return new UpdateGraphIndicesService(graphService); + } + + private final GraphService graphService; + + @Getter private final boolean graphStatusEnabled; + + @Getter @Setter @VisibleForTesting private boolean graphDiffMode; + + private static final Set UPDATE_CHANGE_TYPES = + ImmutableSet.of( + ChangeType.CREATE, + ChangeType.CREATE_ENTITY, + ChangeType.UPSERT, + ChangeType.RESTATE, + ChangeType.PATCH); + + public UpdateGraphIndicesService(GraphService graphService) { + this(graphService, true, true); + } + + public UpdateGraphIndicesService( + GraphService graphService, boolean graphDiffMode, boolean graphStatusEnabled) { + this.graphService = graphService; + this.graphDiffMode = graphDiffMode; + this.graphStatusEnabled = graphStatusEnabled; + } + + @Override + public void handleChangeEvent( + @Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) { + try { + MCLItemImpl mclItem = + MCLItemImpl.builder().build(event, opContext.getAspectRetrieverOpt().get()); + + if (UPDATE_CHANGE_TYPES.contains(event.getChangeType())) { + handleUpdateChangeEvent(opContext, mclItem); + + if (graphStatusEnabled && mclItem.getAspectName().equals(STATUS_ASPECT_NAME)) { + handleStatusUpdateChangeEvent(opContext, mclItem); + } + } else if (event.getChangeType() == ChangeType.DELETE) { + handleDeleteChangeEvent(opContext, mclItem); + + if (graphStatusEnabled && mclItem.getAspectName().equals(STATUS_ASPECT_NAME)) { + handleStatusUpdateChangeEvent(opContext, mclItem); + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private void handleStatusUpdateChangeEvent( + @Nonnull final OperationContext opContext, @Nonnull final MCLItem item) { + final Boolean removed; + if (ChangeType.DELETE.equals(item.getChangeType())) { + removed = false; + } else if (ChangeType.RESTATE.equals(item.getChangeType()) + || item.getPreviousRecordTemplate() == null + || !item.getPreviousAspect(Status.class).equals(item.getAspect(Status.class))) { + removed = item.getAspect(Status.class).isRemoved(); + } else { + removed = null; + } + + if (removed != null) { + graphService.setEdgeStatus(item.getUrn(), removed, EdgeUrnType.values()); + } + } + + /** + * This very important method processes {@link MetadataChangeLog} events that represent changes to + * the Metadata Graph. + * + *

In particular, it handles updating the Search, Graph, Timeseries, and System Metadata stores + * in response to a given change type to reflect the changes present in the new aspect. + * + * @param event the change event to be processed. + */ + private void handleUpdateChangeEvent( + @Nonnull final OperationContext opContext, @Nonnull final MCLItem event) throws IOException { + + final AspectSpec aspectSpec = event.getAspectSpec(); + final Urn urn = event.getUrn(); + + RecordTemplate aspect = event.getRecordTemplate(); + RecordTemplate previousAspect = event.getPreviousRecordTemplate(); + + // For all aspects, attempt to update Graph + SystemMetadata systemMetadata = event.getSystemMetadata(); + if (graphDiffMode + && !(graphService instanceof DgraphGraphService) + && (systemMetadata == null + || systemMetadata.getProperties() == null + || !Boolean.parseBoolean(systemMetadata.getProperties().get(FORCE_INDEXING_KEY)))) { + updateGraphServiceDiff(urn, aspectSpec, previousAspect, aspect, event.getMetadataChangeLog()); + } else { + updateGraphService(opContext, urn, aspectSpec, aspect, event.getMetadataChangeLog()); + } + } + + /** + * This very important method processes {@link MetadataChangeLog} deletion events to cleanup the + * Metadata Graph when an aspect or entity is removed. + * + *

In particular, it handles updating the Search, Graph, Timeseries, and System Metadata stores + * to reflect the deletion of a particular aspect. + * + *

Note that if an entity's key aspect is deleted, the entire entity will be purged from + * search, graph, timeseries, etc. + * + * @param event the change event to be processed. + */ + private void handleDeleteChangeEvent( + @Nonnull final OperationContext opContext, @Nonnull final MCLItem event) { + + final EntitySpec entitySpec = event.getEntitySpec(); + final Urn urn = event.getUrn(); + + AspectSpec aspectSpec = entitySpec.getAspectSpec(event.getAspectName()); + if (aspectSpec == null) { + throw new RuntimeException( + String.format( + "Failed to retrieve Aspect Spec for entity with name %s, aspect with name %s. Cannot update indices for MCL.", + urn.getEntityType(), event.getAspectName())); + } + + RecordTemplate aspect = event.getRecordTemplate(); + Boolean isDeletingKey = event.getAspectName().equals(entitySpec.getKeyAspectName()); + + if (!aspectSpec.isTimeseries()) { + deleteGraphData( + opContext, urn, aspectSpec, aspect, isDeletingKey, event.getMetadataChangeLog()); + } + } + + // TODO: remove this method once we implement sourceOverride when creating graph edges + private void updateFineGrainedEdgesAndRelationships( + Urn entity, + FineGrainedLineageArray fineGrainedLineageArray, + List edgesToAdd, + HashMap> urnToRelationshipTypesBeingAdded) { + if (fineGrainedLineageArray != null) { + for (FineGrainedLineage fineGrainedLineage : fineGrainedLineageArray) { + if (!fineGrainedLineage.hasDownstreams() || !fineGrainedLineage.hasUpstreams()) { + break; + } + // Fine grained lineage array is present either on datajob (datajob input/output) or dataset + // We set the datajob as the viaEntity in scenario 1, and the query (if present) as the + // viaEntity in scenario 2 + Urn viaEntity = + entity.getEntityType().equals("dataJob") ? entity : fineGrainedLineage.getQuery(); + // for every downstream, create an edge with each of the upstreams + for (Urn downstream : fineGrainedLineage.getDownstreams()) { + for (Urn upstream : fineGrainedLineage.getUpstreams()) { + // TODO: add edges uniformly across aspects + edgesToAdd.add( + new Edge( + downstream, + upstream, + DOWNSTREAM_OF, + null, + null, + null, + null, + null, + entity, + viaEntity)); + Set relationshipTypes = + urnToRelationshipTypesBeingAdded.getOrDefault(downstream, new HashSet<>()); + relationshipTypes.add(DOWNSTREAM_OF); + urnToRelationshipTypesBeingAdded.put(downstream, relationshipTypes); + } + } + } + } + } + + // TODO: remove this method once we implement sourceOverride and update inputFields aspect + private void updateInputFieldEdgesAndRelationships( + @Nonnull final Urn urn, + @Nonnull final InputFields inputFields, + @Nonnull final List edgesToAdd, + @Nonnull final HashMap> urnToRelationshipTypesBeingAdded) { + if (inputFields.hasFields()) { + for (final InputField field : inputFields.getFields()) { + if (field.hasSchemaFieldUrn() + && field.hasSchemaField() + && field.getSchemaField().hasFieldPath()) { + final Urn sourceFieldUrn = + SchemaFieldUtils.generateSchemaFieldUrn(urn, field.getSchemaField().getFieldPath()); + // TODO: add edges uniformly across aspects + edgesToAdd.add( + new Edge( + sourceFieldUrn, + field.getSchemaFieldUrn(), + DOWNSTREAM_OF, + null, + null, + null, + null, + null)); + final Set relationshipTypes = + urnToRelationshipTypesBeingAdded.getOrDefault(sourceFieldUrn, new HashSet<>()); + relationshipTypes.add(DOWNSTREAM_OF); + urnToRelationshipTypesBeingAdded.put(sourceFieldUrn, relationshipTypes); + } + } + } + } + + private Pair, HashMap>> getEdgesAndRelationshipTypesFromAspect( + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nonnull final RecordTemplate aspect, + @Nonnull final MetadataChangeLog event, + final boolean isNewAspectVersion) { + final List edgesToAdd = new ArrayList<>(); + final HashMap> urnToRelationshipTypesBeingAdded = new HashMap<>(); + + // we need to manually set schemaField <-> schemaField edges for fineGrainedLineage and + // inputFields + // since @Relationship only links between the parent entity urn and something else. + if (aspectSpec.getName().equals(Constants.UPSTREAM_LINEAGE_ASPECT_NAME)) { + UpstreamLineage upstreamLineage = new UpstreamLineage(aspect.data()); + updateFineGrainedEdgesAndRelationships( + urn, + upstreamLineage.getFineGrainedLineages(), + edgesToAdd, + urnToRelationshipTypesBeingAdded); + } else if (aspectSpec.getName().equals(Constants.INPUT_FIELDS_ASPECT_NAME)) { + final InputFields inputFields = new InputFields(aspect.data()); + updateInputFieldEdgesAndRelationships( + urn, inputFields, edgesToAdd, urnToRelationshipTypesBeingAdded); + } else if (aspectSpec.getName().equals(Constants.DATA_JOB_INPUT_OUTPUT_ASPECT_NAME)) { + DataJobInputOutput dataJobInputOutput = new DataJobInputOutput(aspect.data()); + updateFineGrainedEdgesAndRelationships( + urn, + dataJobInputOutput.getFineGrainedLineages(), + edgesToAdd, + urnToRelationshipTypesBeingAdded); + } + + Map> extractedFields = + FieldExtractor.extractFields(aspect, aspectSpec.getRelationshipFieldSpecs()); + + for (Map.Entry> entry : extractedFields.entrySet()) { + Set relationshipTypes = + urnToRelationshipTypesBeingAdded.getOrDefault(urn, new HashSet<>()); + relationshipTypes.add(entry.getKey().getRelationshipName()); + urnToRelationshipTypesBeingAdded.put(urn, relationshipTypes); + final List newEdges = + GraphIndexUtils.extractGraphEdges(entry, aspect, urn, event, isNewAspectVersion); + edgesToAdd.addAll(newEdges); + } + return Pair.of(edgesToAdd, urnToRelationshipTypesBeingAdded); + } + + /** Process snapshot and update graph index */ + private void updateGraphService( + @Nonnull final OperationContext opContext, + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nonnull final RecordTemplate aspect, + @Nonnull final MetadataChangeLog event) { + Pair, HashMap>> edgeAndRelationTypes = + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event, true); + + final List edgesToAdd = edgeAndRelationTypes.getFirst(); + final HashMap> urnToRelationshipTypesBeingAdded = + edgeAndRelationTypes.getSecond(); + + log.debug("Here's the relationship types found {}", urnToRelationshipTypesBeingAdded); + if (!urnToRelationshipTypesBeingAdded.isEmpty()) { + for (Map.Entry> entry : urnToRelationshipTypesBeingAdded.entrySet()) { + graphService.removeEdgesFromNode( + opContext, + entry.getKey(), + new ArrayList<>(entry.getValue()), + newRelationshipFilter( + new Filter().setOr(new ConjunctiveCriterionArray()), + RelationshipDirection.OUTGOING)); + } + edgesToAdd.forEach(graphService::addEdge); + } + } + + private void updateGraphServiceDiff( + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nullable final RecordTemplate oldAspect, + @Nonnull final RecordTemplate newAspect, + @Nonnull final MetadataChangeLog event) { + Pair, HashMap>> oldEdgeAndRelationTypes = null; + if (oldAspect != null) { + oldEdgeAndRelationTypes = + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, oldAspect, event, false); + } + + final List oldEdges = + oldEdgeAndRelationTypes != null + ? oldEdgeAndRelationTypes.getFirst() + : Collections.emptyList(); + final Set oldEdgeSet = new HashSet<>(oldEdges); + + Pair, HashMap>> newEdgeAndRelationTypes = + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, newAspect, event, true); + + final List newEdges = newEdgeAndRelationTypes.getFirst(); + final Set newEdgeSet = new HashSet<>(newEdges); + + // Edges to add + final List additiveDifference = + newEdgeSet.stream().filter(edge -> !oldEdgeSet.contains(edge)).collect(Collectors.toList()); + + // Edges to remove + final List subtractiveDifference = + oldEdgeSet.stream().filter(edge -> !newEdgeSet.contains(edge)).collect(Collectors.toList()); + + // Edges to update + final List mergedEdges = getMergedEdges(oldEdgeSet, newEdgeSet); + + // Remove any old edges that no longer exist first + if (subtractiveDifference.size() > 0) { + log.debug("Removing edges: {}", subtractiveDifference); + subtractiveDifference.forEach(graphService::removeEdge); + } + + // Then add new edges + if (additiveDifference.size() > 0) { + log.debug("Adding edges: {}", additiveDifference); + additiveDifference.forEach(graphService::addEdge); + } + + // Then update existing edges + if (mergedEdges.size() > 0) { + log.debug("Updating edges: {}", mergedEdges); + mergedEdges.forEach(graphService::upsertEdge); + } + } + + private static List getMergedEdges(final Set oldEdgeSet, final Set newEdgeSet) { + final Map oldEdgesMap = + oldEdgeSet.stream() + .map(edge -> Pair.of(edge.hashCode(), edge)) + .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond)); + + final List mergedEdges = new ArrayList<>(); + if (!oldEdgesMap.isEmpty()) { + for (Edge newEdge : newEdgeSet) { + if (oldEdgesMap.containsKey(newEdge.hashCode())) { + final Edge oldEdge = oldEdgesMap.get(newEdge.hashCode()); + final Edge mergedEdge = GraphIndexUtils.mergeEdges(oldEdge, newEdge); + mergedEdges.add(mergedEdge); + } + } + } + + return mergedEdges; + } + + private void deleteGraphData( + @Nonnull final OperationContext opContext, + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nonnull final RecordTemplate aspect, + @Nonnull final Boolean isKeyAspect, + @Nonnull final MetadataChangeLog event) { + if (isKeyAspect) { + graphService.removeNode(opContext, urn); + return; + } + + Pair, HashMap>> edgeAndRelationTypes = + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event, true); + + final HashMap> urnToRelationshipTypesBeingAdded = + edgeAndRelationTypes.getSecond(); + if (urnToRelationshipTypesBeingAdded.size() > 0) { + for (Map.Entry> entry : urnToRelationshipTypesBeingAdded.entrySet()) { + graphService.removeEdgesFromNode( + opContext, + entry.getKey(), + new ArrayList<>(entry.getValue()), + createRelationshipFilter( + new Filter().setOr(new ConjunctiveCriterionArray()), + RelationshipDirection.OUTGOING)); + } + } + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java index 2274b0a7c1cd8..3795fd19316b1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java @@ -2,59 +2,37 @@ import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.search.transformer.SearchDocumentTransformer.withSystemCreated; -import static com.linkedin.metadata.search.utils.QueryUtils.*; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; -import com.linkedin.common.InputField; -import com.linkedin.common.InputFields; import com.linkedin.common.Status; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.datajob.DataJobInputOutput; -import com.linkedin.dataset.FineGrainedLineage; -import com.linkedin.dataset.FineGrainedLineageArray; -import com.linkedin.dataset.UpstreamLineage; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.MCLItem; -import com.linkedin.metadata.aspect.models.graph.Edge; import com.linkedin.metadata.entity.SearchIndicesService; import com.linkedin.metadata.entity.ebean.batch.MCLItemImpl; -import com.linkedin.metadata.graph.GraphIndexUtils; -import com.linkedin.metadata.graph.GraphService; -import com.linkedin.metadata.graph.dgraph.DgraphGraphService; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.metadata.models.RelationshipFieldSpec; -import com.linkedin.metadata.models.extractor.FieldExtractor; -import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Filter; -import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders; import com.linkedin.metadata.search.transformer.SearchDocumentTransformer; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.timeseries.transformer.TimeseriesAspectTransformer; -import com.linkedin.metadata.utils.SchemaFieldUtils; import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.SystemMetadata; import com.linkedin.structured.StructuredPropertyDefinition; -import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -63,33 +41,25 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; -import javax.annotation.Nullable; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Value; @Slf4j public class UpdateIndicesService implements SearchIndicesService { - private static final String DOWNSTREAM_OF = "DownstreamOf"; - - private final GraphService _graphService; - private final EntitySearchService _entitySearchService; - private final TimeseriesAspectService _timeseriesAspectService; - private final SystemMetadataService _systemMetadataService; - private final SearchDocumentTransformer _searchDocumentTransformer; - private final EntityIndexBuilders _entityIndexBuilders; - @Nonnull private final String idHashAlgo; - @Value("${featureFlags.graphServiceDiffModeEnabled:true}") - private boolean _graphDiffMode; + @VisibleForTesting @Getter private final UpdateGraphIndicesService updateGraphIndicesService; + private final EntitySearchService entitySearchService; + private final TimeseriesAspectService timeseriesAspectService; + private final SystemMetadataService systemMetadataService; + private final SearchDocumentTransformer searchDocumentTransformer; + private final EntityIndexBuilders entityIndexBuilders; + @Nonnull private final String idHashAlgo; - @Value("${featureFlags.searchServiceDiffModeEnabled:true}") - private boolean _searchDiffMode; + @Getter private final boolean searchDiffMode; - @Value("${structuredProperties.enabled}") - private boolean _structuredPropertiesHookEnabled; + @Getter private final boolean structuredPropertiesHookEnabled; - @Value("${structuredProperties.writeEnabled}") - private boolean _structuredPropertiesWriteEnabled; + @Getter private final boolean structuredPropertiesWriteEnabled; private static final Set UPDATE_CHANGE_TYPES = ImmutableSet.of( @@ -99,31 +69,48 @@ public class UpdateIndicesService implements SearchIndicesService { ChangeType.RESTATE, ChangeType.PATCH); - @VisibleForTesting - public void setGraphDiffMode(boolean graphDiffMode) { - _graphDiffMode = graphDiffMode; - } - - @VisibleForTesting - public void setSearchDiffMode(boolean searchDiffMode) { - _searchDiffMode = searchDiffMode; + public UpdateIndicesService( + UpdateGraphIndicesService updateGraphIndicesService, + EntitySearchService entitySearchService, + TimeseriesAspectService timeseriesAspectService, + SystemMetadataService systemMetadataService, + SearchDocumentTransformer searchDocumentTransformer, + EntityIndexBuilders entityIndexBuilders, + @Nonnull String idHashAlgo) { + this( + updateGraphIndicesService, + entitySearchService, + timeseriesAspectService, + systemMetadataService, + searchDocumentTransformer, + entityIndexBuilders, + idHashAlgo, + true, + true, + true); } public UpdateIndicesService( - GraphService graphService, + UpdateGraphIndicesService updateGraphIndicesService, EntitySearchService entitySearchService, TimeseriesAspectService timeseriesAspectService, SystemMetadataService systemMetadataService, SearchDocumentTransformer searchDocumentTransformer, EntityIndexBuilders entityIndexBuilders, - @Nonnull String idHashAlgo) { - _graphService = graphService; - _entitySearchService = entitySearchService; - _timeseriesAspectService = timeseriesAspectService; - _systemMetadataService = systemMetadataService; - _searchDocumentTransformer = searchDocumentTransformer; - _entityIndexBuilders = entityIndexBuilders; + @Nonnull String idHashAlgo, + boolean searchDiffMode, + boolean structuredPropertiesHookEnabled, + boolean structuredPropertiesWriteEnabled) { + this.updateGraphIndicesService = updateGraphIndicesService; + this.entitySearchService = entitySearchService; + this.timeseriesAspectService = timeseriesAspectService; + this.systemMetadataService = systemMetadataService; + this.searchDocumentTransformer = searchDocumentTransformer; + this.entityIndexBuilders = entityIndexBuilders; this.idHashAlgo = idHashAlgo; + this.searchDiffMode = searchDiffMode; + this.structuredPropertiesHookEnabled = structuredPropertiesHookEnabled; + this.structuredPropertiesWriteEnabled = structuredPropertiesWriteEnabled; } @Override @@ -144,6 +131,9 @@ public void handleChangeEvent( } else if (hookEvent.getChangeType() == ChangeType.DELETE) { handleDeleteChangeEvent(opContext, mclItem); } + + // graph update + updateGraphIndicesService.handleChangeEvent(opContext, event); } } catch (IOException e) { throw new RuntimeException(e); @@ -191,18 +181,6 @@ private void handleUpdateChangeEvent( // Step 2. For all aspects, attempt to update Search updateSearchService(opContext, event); - - // Step 3. For all aspects, attempt to update Graph - SystemMetadata systemMetadata = event.getSystemMetadata(); - if (_graphDiffMode - && !(_graphService instanceof DgraphGraphService) - && (systemMetadata == null - || systemMetadata.getProperties() == null - || !Boolean.parseBoolean(systemMetadata.getProperties().get(FORCE_INDEXING_KEY)))) { - updateGraphServiceDiff(urn, aspectSpec, previousAspect, aspect, event.getMetadataChangeLog()); - } else { - updateGraphService(urn, aspectSpec, aspect, event.getMetadataChangeLog()); - } } public void updateIndexMappings( @@ -210,9 +188,8 @@ public void updateIndexMappings( EntitySpec entitySpec, AspectSpec aspectSpec, RecordTemplate newValue, - RecordTemplate oldValue) - throws IOException { - if (_structuredPropertiesHookEnabled + RecordTemplate oldValue) { + if (structuredPropertiesHookEnabled && STRUCTURED_PROPERTY_ENTITY_NAME.equals(entitySpec.getName()) && STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME.equals(aspectSpec.getName())) { @@ -228,7 +205,7 @@ public void updateIndexMappings( newDefinition.getEntityTypes().removeAll(oldEntityTypes); if (newDefinition.getEntityTypes().size() > 0) { - _entityIndexBuilders + entityIndexBuilders .buildReindexConfigsWithNewStructProp(urn, newDefinition) .forEach( reindexState -> { @@ -237,7 +214,7 @@ public void updateIndexMappings( "Applying new structured property {} to index {}", newDefinition, reindexState.name()); - _entityIndexBuilders.getIndexBuilder().applyMappings(reindexState, false); + entityIndexBuilders.getIndexBuilder().applyMappings(reindexState, false); } catch (IOException e) { throw new RuntimeException(e); } @@ -277,236 +254,10 @@ private void handleDeleteChangeEvent( if (!aspectSpec.isTimeseries()) { deleteSystemMetadata(urn, aspectSpec, isDeletingKey); - deleteGraphData(urn, aspectSpec, aspect, isDeletingKey, event.getMetadataChangeLog()); deleteSearchData(opContext, urn, entitySpec.getName(), aspectSpec, aspect, isDeletingKey); } } - // TODO: remove this method once we implement sourceOverride when creating graph edges - private void updateFineGrainedEdgesAndRelationships( - Urn entity, - FineGrainedLineageArray fineGrainedLineageArray, - List edgesToAdd, - HashMap> urnToRelationshipTypesBeingAdded) { - if (fineGrainedLineageArray != null) { - for (FineGrainedLineage fineGrainedLineage : fineGrainedLineageArray) { - if (!fineGrainedLineage.hasDownstreams() || !fineGrainedLineage.hasUpstreams()) { - break; - } - // Fine grained lineage array is present either on datajob (datajob input/output) or dataset - // We set the datajob as the viaEntity in scenario 1, and the query (if present) as the - // viaEntity in scenario 2 - Urn viaEntity = - entity.getEntityType().equals("dataJob") ? entity : fineGrainedLineage.getQuery(); - // for every downstream, create an edge with each of the upstreams - for (Urn downstream : fineGrainedLineage.getDownstreams()) { - for (Urn upstream : fineGrainedLineage.getUpstreams()) { - // TODO: add edges uniformly across aspects - edgesToAdd.add( - new Edge( - downstream, - upstream, - DOWNSTREAM_OF, - null, - null, - null, - null, - null, - entity, - viaEntity)); - Set relationshipTypes = - urnToRelationshipTypesBeingAdded.getOrDefault(downstream, new HashSet<>()); - relationshipTypes.add(DOWNSTREAM_OF); - urnToRelationshipTypesBeingAdded.put(downstream, relationshipTypes); - } - } - } - } - } - - // TODO: remove this method once we implement sourceOverride and update inputFields aspect - private void updateInputFieldEdgesAndRelationships( - @Nonnull final Urn urn, - @Nonnull final InputFields inputFields, - @Nonnull final List edgesToAdd, - @Nonnull final HashMap> urnToRelationshipTypesBeingAdded) { - if (inputFields.hasFields()) { - for (final InputField field : inputFields.getFields()) { - if (field.hasSchemaFieldUrn() - && field.hasSchemaField() - && field.getSchemaField().hasFieldPath()) { - final Urn sourceFieldUrn = - SchemaFieldUtils.generateSchemaFieldUrn(urn, field.getSchemaField().getFieldPath()); - // TODO: add edges uniformly across aspects - edgesToAdd.add( - new Edge( - sourceFieldUrn, - field.getSchemaFieldUrn(), - DOWNSTREAM_OF, - null, - null, - null, - null, - null)); - final Set relationshipTypes = - urnToRelationshipTypesBeingAdded.getOrDefault(sourceFieldUrn, new HashSet<>()); - relationshipTypes.add(DOWNSTREAM_OF); - urnToRelationshipTypesBeingAdded.put(sourceFieldUrn, relationshipTypes); - } - } - } - } - - private Pair, HashMap>> getEdgesAndRelationshipTypesFromAspect( - @Nonnull final Urn urn, - @Nonnull final AspectSpec aspectSpec, - @Nonnull final RecordTemplate aspect, - @Nonnull final MetadataChangeLog event, - final boolean isNewAspectVersion) { - final List edgesToAdd = new ArrayList<>(); - final HashMap> urnToRelationshipTypesBeingAdded = new HashMap<>(); - - // we need to manually set schemaField <-> schemaField edges for fineGrainedLineage and - // inputFields - // since @Relationship only links between the parent entity urn and something else. - if (aspectSpec.getName().equals(Constants.UPSTREAM_LINEAGE_ASPECT_NAME)) { - UpstreamLineage upstreamLineage = new UpstreamLineage(aspect.data()); - updateFineGrainedEdgesAndRelationships( - urn, - upstreamLineage.getFineGrainedLineages(), - edgesToAdd, - urnToRelationshipTypesBeingAdded); - } else if (aspectSpec.getName().equals(Constants.INPUT_FIELDS_ASPECT_NAME)) { - final InputFields inputFields = new InputFields(aspect.data()); - updateInputFieldEdgesAndRelationships( - urn, inputFields, edgesToAdd, urnToRelationshipTypesBeingAdded); - } else if (aspectSpec.getName().equals(Constants.DATA_JOB_INPUT_OUTPUT_ASPECT_NAME)) { - DataJobInputOutput dataJobInputOutput = new DataJobInputOutput(aspect.data()); - updateFineGrainedEdgesAndRelationships( - urn, - dataJobInputOutput.getFineGrainedLineages(), - edgesToAdd, - urnToRelationshipTypesBeingAdded); - } - - Map> extractedFields = - FieldExtractor.extractFields(aspect, aspectSpec.getRelationshipFieldSpecs()); - - for (Map.Entry> entry : extractedFields.entrySet()) { - Set relationshipTypes = - urnToRelationshipTypesBeingAdded.getOrDefault(urn, new HashSet<>()); - relationshipTypes.add(entry.getKey().getRelationshipName()); - urnToRelationshipTypesBeingAdded.put(urn, relationshipTypes); - final List newEdges = - GraphIndexUtils.extractGraphEdges(entry, aspect, urn, event, isNewAspectVersion); - edgesToAdd.addAll(newEdges); - } - return Pair.of(edgesToAdd, urnToRelationshipTypesBeingAdded); - } - - /** Process snapshot and update graph index */ - private void updateGraphService( - @Nonnull final Urn urn, - @Nonnull final AspectSpec aspectSpec, - @Nonnull final RecordTemplate aspect, - @Nonnull final MetadataChangeLog event) { - Pair, HashMap>> edgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event, true); - - final List edgesToAdd = edgeAndRelationTypes.getFirst(); - final HashMap> urnToRelationshipTypesBeingAdded = - edgeAndRelationTypes.getSecond(); - - log.debug("Here's the relationship types found {}", urnToRelationshipTypesBeingAdded); - if (!urnToRelationshipTypesBeingAdded.isEmpty()) { - for (Map.Entry> entry : urnToRelationshipTypesBeingAdded.entrySet()) { - _graphService.removeEdgesFromNode( - entry.getKey(), - new ArrayList<>(entry.getValue()), - newRelationshipFilter( - new Filter().setOr(new ConjunctiveCriterionArray()), - RelationshipDirection.OUTGOING)); - } - edgesToAdd.forEach(_graphService::addEdge); - } - } - - private void updateGraphServiceDiff( - @Nonnull final Urn urn, - @Nonnull final AspectSpec aspectSpec, - @Nullable final RecordTemplate oldAspect, - @Nonnull final RecordTemplate newAspect, - @Nonnull final MetadataChangeLog event) { - Pair, HashMap>> oldEdgeAndRelationTypes = null; - if (oldAspect != null) { - oldEdgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, oldAspect, event, false); - } - - final List oldEdges = - oldEdgeAndRelationTypes != null - ? oldEdgeAndRelationTypes.getFirst() - : Collections.emptyList(); - final Set oldEdgeSet = new HashSet<>(oldEdges); - - Pair, HashMap>> newEdgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, newAspect, event, true); - - final List newEdges = newEdgeAndRelationTypes.getFirst(); - final Set newEdgeSet = new HashSet<>(newEdges); - - // Edges to add - final List additiveDifference = - newEdgeSet.stream().filter(edge -> !oldEdgeSet.contains(edge)).collect(Collectors.toList()); - - // Edges to remove - final List subtractiveDifference = - oldEdgeSet.stream().filter(edge -> !newEdgeSet.contains(edge)).collect(Collectors.toList()); - - // Edges to update - final List mergedEdges = getMergedEdges(oldEdgeSet, newEdgeSet); - - // Remove any old edges that no longer exist first - if (subtractiveDifference.size() > 0) { - log.debug("Removing edges: {}", subtractiveDifference); - subtractiveDifference.forEach(_graphService::removeEdge); - } - - // Then add new edges - if (additiveDifference.size() > 0) { - log.debug("Adding edges: {}", additiveDifference); - additiveDifference.forEach(_graphService::addEdge); - } - - // Then update existing edges - if (mergedEdges.size() > 0) { - log.debug("Updating edges: {}", mergedEdges); - mergedEdges.forEach(_graphService::upsertEdge); - } - } - - private static List getMergedEdges(final Set oldEdgeSet, final Set newEdgeSet) { - final Map oldEdgesMap = - oldEdgeSet.stream() - .map(edge -> Pair.of(edge.hashCode(), edge)) - .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond)); - - final List mergedEdges = new ArrayList<>(); - if (!oldEdgesMap.isEmpty()) { - for (com.linkedin.metadata.aspect.models.graph.Edge newEdge : newEdgeSet) { - if (oldEdgesMap.containsKey(newEdge.hashCode())) { - final com.linkedin.metadata.aspect.models.graph.Edge oldEdge = - oldEdgesMap.get(newEdge.hashCode()); - final com.linkedin.metadata.aspect.models.graph.Edge mergedEdge = - GraphIndexUtils.mergeEdges(oldEdge, newEdge); - mergedEdges.add(mergedEdge); - } - } - } - - return mergedEdges; - } - /** Process snapshot and update search index */ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem event) { Urn urn = event.getUrn(); @@ -520,7 +271,7 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev Optional previousSearchDocument = Optional.empty(); try { searchDocument = - _searchDocumentTransformer + searchDocumentTransformer .transformAspect(opContext, urn, aspect, aspectSpec, false) .map( objectNode -> @@ -540,16 +291,16 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev return; } - final String docId = _entityIndexBuilders.getIndexConvention().getEntityDocumentId(urn); + final String docId = entityIndexBuilders.getIndexConvention().getEntityDocumentId(urn); - if (_searchDiffMode + if (searchDiffMode && (systemMetadata == null || systemMetadata.getProperties() == null || !Boolean.parseBoolean(systemMetadata.getProperties().get(FORCE_INDEXING_KEY)))) { if (previousAspect != null) { try { previousSearchDocument = - _searchDocumentTransformer.transformAspect( + searchDocumentTransformer.transformAspect( opContext, urn, previousAspect, aspectSpec, false); } catch (Exception e) { log.error( @@ -572,7 +323,7 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev searchDocument.get(), previousSearchDocument.orElse(null)) .toString(); - _entitySearchService.upsertDocument(opContext, entityName, finalDocument, docId); + entitySearchService.upsertDocument(opContext, entityName, finalDocument, docId); } /** Process snapshot and update time-series index */ @@ -597,18 +348,18 @@ private void updateTimeseriesFields( .entrySet() .forEach( document -> { - _timeseriesAspectService.upsertDocument( + timeseriesAspectService.upsertDocument( opContext, entityType, aspectName, document.getKey(), document.getValue()); }); } private void updateSystemMetadata( SystemMetadata systemMetadata, Urn urn, AspectSpec aspectSpec, RecordTemplate aspect) { - _systemMetadataService.insert(systemMetadata, urn.toString(), aspectSpec.getName()); + systemMetadataService.insert(systemMetadata, urn.toString(), aspectSpec.getName()); // If processing status aspect update all aspects for this urn to removed if (aspectSpec.getName().equals(Constants.STATUS_ASPECT_NAME)) { - _systemMetadataService.setDocStatus(urn.toString(), ((Status) aspect).isRemoved()); + systemMetadataService.setDocStatus(urn.toString(), ((Status) aspect).isRemoved()); } } @@ -616,41 +367,13 @@ private void deleteSystemMetadata(Urn urn, AspectSpec aspectSpec, Boolean isKeyA if (isKeyAspect) { // Delete all aspects log.debug(String.format("Deleting all system metadata for urn: %s", urn)); - _systemMetadataService.deleteUrn(urn.toString()); + systemMetadataService.deleteUrn(urn.toString()); } else { // Delete all aspects from system metadata service log.debug( String.format( "Deleting system metadata for urn: %s, aspect: %s", urn, aspectSpec.getName())); - _systemMetadataService.deleteAspect(urn.toString(), aspectSpec.getName()); - } - } - - private void deleteGraphData( - @Nonnull final Urn urn, - @Nonnull final AspectSpec aspectSpec, - @Nonnull final RecordTemplate aspect, - @Nonnull final Boolean isKeyAspect, - @Nonnull final MetadataChangeLog event) { - if (isKeyAspect) { - _graphService.removeNode(urn); - return; - } - - Pair, HashMap>> edgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event, true); - - final HashMap> urnToRelationshipTypesBeingAdded = - edgeAndRelationTypes.getSecond(); - if (urnToRelationshipTypesBeingAdded.size() > 0) { - for (Map.Entry> entry : urnToRelationshipTypesBeingAdded.entrySet()) { - _graphService.removeEdgesFromNode( - entry.getKey(), - new ArrayList<>(entry.getValue()), - createRelationshipFilter( - new Filter().setOr(new ConjunctiveCriterionArray()), - RelationshipDirection.OUTGOING)); - } + systemMetadataService.deleteAspect(urn.toString(), aspectSpec.getName()); } } @@ -670,14 +393,14 @@ private void deleteSearchData( } if (isKeyAspect) { - _entitySearchService.deleteDocument(opContext, entityName, docId); + entitySearchService.deleteDocument(opContext, entityName, docId); return; } Optional searchDocument; try { searchDocument = - _searchDocumentTransformer + searchDocumentTransformer .transformAspect(opContext, urn, aspect, aspectSpec, true) .map(Objects::toString); // TODO } catch (Exception e) { @@ -690,18 +413,6 @@ private void deleteSearchData( return; } - _entitySearchService.upsertDocument(opContext, entityName, searchDocument.get(), docId); - } - - private EntitySpec getEventEntitySpec( - @Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) { - try { - return opContext.getEntityRegistry().getEntitySpec(event.getEntityType()); - } catch (IllegalArgumentException e) { - throw new RuntimeException( - String.format( - "Failed to retrieve Entity Spec for entity with name %s. Cannot update indices for MCL.", - event.getEntityType())); - } + entitySearchService.upsertDocument(opContext, entityName, searchDocument.get(), docId); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java index fe3608a2cf71d..d585ff1ce8383 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java @@ -37,6 +37,7 @@ import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; import java.sql.Timestamp; +import java.util.List; import java.util.Map; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -79,14 +80,15 @@ public void testDeleteUniqueRefGeneratesValidMCP() { Mockito.when( _graphService.findRelatedEntities( - null, - newFilter("urn", container.toString()), - null, - EMPTY_FILTER, - ImmutableList.of(), - newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING), - 0, - 10000)) + any(OperationContext.class), + nullable(List.class), + eq(newFilter("urn", container.toString())), + nullable(List.class), + eq(EMPTY_FILTER), + eq(ImmutableList.of()), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(0), + eq((10000)))) .thenReturn(mockRelatedEntities); final EntityResponse entityResponse = new EntityResponse(); @@ -195,14 +197,15 @@ public void testDeleteSearchReferences() { new RelatedEntitiesResult(0, 0, 0, ImmutableList.of()); Mockito.when( _graphService.findRelatedEntities( - null, - newFilter("urn", form.toString()), - null, - EMPTY_FILTER, - ImmutableList.of(), - newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING), - 0, - 10000)) + any(OperationContext.class), + nullable(List.class), + eq(newFilter("urn", form.toString())), + nullable(List.class), + eq(EMPTY_FILTER), + eq(ImmutableList.of()), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(0), + eq((10000)))) .thenReturn(mockRelatedEntities); final DeleteReferencesResponse response = @@ -249,14 +252,15 @@ public void testDeleteNoSearchReferences() { new RelatedEntitiesResult(0, 0, 0, ImmutableList.of()); Mockito.when( _graphService.findRelatedEntities( - null, - newFilter("urn", form.toString()), - null, - EMPTY_FILTER, - ImmutableList.of(), - newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING), - 0, - 10000)) + any(OperationContext.class), + nullable(List.class), + eq(newFilter("urn", form.toString())), + nullable(List.class), + eq(EMPTY_FILTER), + eq(ImmutableList.of()), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(0), + eq((10000)))) .thenReturn(mockRelatedEntities); final DeleteReferencesResponse response = @@ -308,14 +312,15 @@ public void testDeleteSearchReferencesDryRun() { new RelatedEntitiesResult(0, 0, 0, ImmutableList.of()); Mockito.when( _graphService.findRelatedEntities( - null, - newFilter("urn", form.toString()), - null, - EMPTY_FILTER, - ImmutableList.of(), - newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING), - 0, - 10000)) + any(OperationContext.class), + nullable(List.class), + eq(newFilter("urn", form.toString())), + nullable(List.class), + eq(EMPTY_FILTER), + eq(ImmutableList.of()), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(0), + eq((10000)))) .thenReturn(mockRelatedEntities); final DeleteReferencesResponse response = diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java index 5d9a5079f2a3b..64ab95b5c6843 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java @@ -11,16 +11,16 @@ import com.linkedin.common.urn.DataJobUrn; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.metadata.aspect.models.graph.Edge; import com.linkedin.metadata.aspect.models.graph.RelatedEntity; import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.graph.dgraph.DgraphGraphService; import com.linkedin.metadata.graph.neo4j.Neo4jGraphService; -import com.linkedin.metadata.query.LineageFlags; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.query.filter.RelationshipFilter; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URISyntaxException; import java.time.Duration; import java.util.ArrayList; @@ -47,7 +47,6 @@ import javax.annotation.Nullable; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.Assert; -import org.testng.annotations.BeforeMethod; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -273,20 +272,16 @@ public int compare(RelatedEntity left, RelatedEntity right) { /** Any source and destination type value. */ protected static @Nullable List anyType = null; - protected final GraphQueryConfiguration _graphQueryConfiguration = getGraphQueryConfiguration(); + protected static final GraphQueryConfiguration _graphQueryConfiguration = + getGraphQueryConfiguration(); + protected static final OperationContext operationContext = + TestOperationContexts.systemContextNoSearchAuthorization(); /** Timeout used to test concurrent ops in doTestConcurrentOp. */ protected Duration getTestConcurrentOpTimeout() { return Duration.ofMinutes(1); } - @BeforeMethod - public void disableAssert() { - PathSpecBasedSchemaAnnotationVisitor.class - .getClassLoader() - .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); - } - @Test public void testStaticUrns() { assertNotNull(dataset1Urn); @@ -502,6 +497,7 @@ public void testAddEdge( RelatedEntitiesResult relatedOutgoing = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -514,6 +510,7 @@ public void testAddEdge( RelatedEntitiesResult relatedIncoming = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -531,6 +528,7 @@ public void testPopulatedGraphService() throws Exception { RelatedEntitiesResult relatedOutgoingEntitiesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -549,6 +547,7 @@ public void testPopulatedGraphService() throws Exception { downstreamOfSchemaFieldTwoVia, downstreamOfSchemaFieldTwo)); RelatedEntitiesResult relatedIncomingEntitiesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -574,13 +573,13 @@ public void testPopulatedGraphService() throws Exception { downstreamOfSchemaFieldOne)); EntityLineageResult viaNodeResult = service.getLineage( + operationContext, schemaFieldUrnOne, LineageDirection.UPSTREAM, new GraphFilters(List.of("schemaField")), 0, 1000, - 100, - null); + 100); // Multi-path enabled assertEquals(viaNodeResult.getRelationships().size(), 2); // First one is via node @@ -589,13 +588,13 @@ public void testPopulatedGraphService() throws Exception { EntityLineageResult viaNodeResultNoMulti = getGraphService(false) .getLineage( + operationContext, schemaFieldUrnOne, LineageDirection.UPSTREAM, new GraphFilters(List.of("schemaField")), 0, 1000, - 100, - null); + 100); // Multi-path disabled, still has two because via flow creates both edges in response assertEquals(viaNodeResultNoMulti.getRelationships().size(), 2); @@ -612,12 +611,12 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { GraphService service = getLineagePopulatedGraphService(); EntityLineageResult upstreamLineage = - service.getLineage(dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 1); + service.getLineage(operationContext, dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 1); assertEquals(upstreamLineage.getTotal().intValue(), 0); assertEquals(upstreamLineage.getRelationships().size(), 0); EntityLineageResult downstreamLineage = - service.getLineage(dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); + service.getLineage(operationContext, dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); assertEquals(downstreamLineage.getTotal().intValue(), 3); assertEquals(downstreamLineage.getRelationships().size(), 3); Map relationships = @@ -630,7 +629,8 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { assertTrue(relationships.containsKey(dataJobTwoUrn)); assertEquals(relationships.get(dataJobTwoUrn).getType(), consumes); - upstreamLineage = service.getLineage(dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 1); + upstreamLineage = + service.getLineage(operationContext, dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 1); assertEquals(upstreamLineage.getTotal().intValue(), 2); assertEquals(upstreamLineage.getRelationships().size(), 2); relationships = @@ -641,11 +641,13 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { assertTrue(relationships.containsKey(dataJobOneUrn)); assertEquals(relationships.get(dataJobOneUrn).getType(), produces); - downstreamLineage = service.getLineage(dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); + downstreamLineage = + service.getLineage(operationContext, dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); assertEquals(downstreamLineage.getTotal().intValue(), 0); assertEquals(downstreamLineage.getRelationships().size(), 0); - upstreamLineage = service.getLineage(dataJobOneUrn, LineageDirection.UPSTREAM, 0, 1000, 1); + upstreamLineage = + service.getLineage(operationContext, dataJobOneUrn, LineageDirection.UPSTREAM, 0, 1000, 1); assertEquals(upstreamLineage.getTotal().intValue(), 2); assertEquals(upstreamLineage.getRelationships().size(), 2); relationships = @@ -656,7 +658,9 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { assertTrue(relationships.containsKey(dataset2Urn)); assertEquals(relationships.get(dataset2Urn).getType(), consumes); - downstreamLineage = service.getLineage(dataJobOneUrn, LineageDirection.DOWNSTREAM, 0, 1000, 1); + downstreamLineage = + service.getLineage( + operationContext, dataJobOneUrn, LineageDirection.DOWNSTREAM, 0, 1000, 1); assertEquals(downstreamLineage.getTotal().intValue(), 3); assertEquals(downstreamLineage.getRelationships().size(), 3); relationships = @@ -834,6 +838,7 @@ private void doTestFindRelatedEntities( RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, anyType, sourceEntityFilter, anyType, @@ -1118,6 +1123,7 @@ private void doTestFindRelatedEntities( RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, sourceType, EMPTY_FILTER, destinationType, @@ -1139,6 +1145,7 @@ private void doTestFindRelatedEntitiesEntityType( @Nonnull RelatedEntity... expectedEntities) { RelatedEntitiesResult actualEntities = service.findRelatedEntities( + operationContext, sourceType, EMPTY_FILTER, destinationType, @@ -1244,6 +1251,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allOutgoingRelatedEntities = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1263,6 +1271,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allIncomingRelatedEntities = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1289,6 +1298,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allUnknownRelationshipTypeRelatedEntities = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1301,6 +1311,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult someUnknownRelationshipTypeRelatedEntities = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1325,6 +1336,7 @@ public void testFindRelatedEntitiesNoRelationshipTypes() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1340,6 +1352,7 @@ public void testFindRelatedEntitiesNoRelationshipTypes() throws Exception { // did not get any related urns? RelatedEntitiesResult relatedEntitiesAll = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1358,6 +1371,7 @@ public void testFindRelatedEntitiesAllFilters() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType), newFilter("urn", dataset1UrnString), ImmutableList.of(userType), @@ -1371,6 +1385,7 @@ public void testFindRelatedEntitiesAllFilters() throws Exception { relatedEntities = service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType), newFilter("urn", dataset1UrnString), ImmutableList.of(userType), @@ -1389,6 +1404,7 @@ public void testFindRelatedEntitiesMultipleEntityTypes() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType, userType), newFilter("urn", dataset1UrnString), ImmutableList.of(datasetType, userType), @@ -1402,6 +1418,7 @@ public void testFindRelatedEntitiesMultipleEntityTypes() throws Exception { relatedEntities = service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType, userType), newFilter("urn", dataset1UrnString), ImmutableList.of(datasetType, userType), @@ -1421,6 +1438,7 @@ public void testFindRelatedEntitiesOffsetAndCount() throws Exception { // populated graph asserted in testPopulatedGraphService RelatedEntitiesResult allRelatedEntities = service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType), EMPTY_FILTER, anyType, @@ -1436,6 +1454,7 @@ public void testFindRelatedEntitiesOffsetAndCount() throws Exception { idx -> individualRelatedEntities.addAll( service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType), EMPTY_FILTER, anyType, @@ -1540,6 +1559,7 @@ public void testRemoveEdgesFromNode( RelatedEntitiesResult actualOutgoingRelatedUrnsBeforeRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1550,6 +1570,7 @@ public void testRemoveEdgesFromNode( 100); RelatedEntitiesResult actualIncomingRelatedUrnsBeforeRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1566,6 +1587,7 @@ public void testRemoveEdgesFromNode( // we expect these do not change RelatedEntitiesResult relatedEntitiesOfOtherOutgoingRelationTypesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1576,6 +1598,7 @@ public void testRemoveEdgesFromNode( 100); RelatedEntitiesResult relatedEntitiesOfOtherIncomingRelationTypesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1585,11 +1608,13 @@ public void testRemoveEdgesFromNode( 0, 100); - service.removeEdgesFromNode(nodeToRemoveFrom, relationTypes, relationshipFilter); + service.removeEdgesFromNode( + operationContext, nodeToRemoveFrom, relationTypes, relationshipFilter); syncAfterWrite(); RelatedEntitiesResult actualOutgoingRelatedUrnsAfterRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1600,6 +1625,7 @@ public void testRemoveEdgesFromNode( 100); RelatedEntitiesResult actualIncomingRelatedUrnsAfterRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1616,6 +1642,7 @@ public void testRemoveEdgesFromNode( // assert these did not change RelatedEntitiesResult relatedEntitiesOfOtherOutgoingRelationTypesAfterRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1626,6 +1653,7 @@ public void testRemoveEdgesFromNode( 100); RelatedEntitiesResult relatedEntitiesOfOtherIncomingRelationTypesAfterRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1650,6 +1678,7 @@ public void testRemoveEdgesFromNodeNoRelationshipTypes() throws Exception { // populated graph asserted in testPopulatedGraphService RelatedEntitiesResult relatedOutgoingEntitiesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1661,12 +1690,15 @@ public void testRemoveEdgesFromNodeNoRelationshipTypes() throws Exception { // can be replaced with a single removeEdgesFromNode and undirectedRelationships once supported // by all implementations - service.removeEdgesFromNode(nodeToRemoveFrom, Collections.emptyList(), outgoingRelationships); - service.removeEdgesFromNode(nodeToRemoveFrom, Collections.emptyList(), incomingRelationships); + service.removeEdgesFromNode( + operationContext, nodeToRemoveFrom, Collections.emptyList(), outgoingRelationships); + service.removeEdgesFromNode( + operationContext, nodeToRemoveFrom, Collections.emptyList(), incomingRelationships); syncAfterWrite(); RelatedEntitiesResult relatedOutgoingEntitiesAfterRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1680,13 +1712,20 @@ public void testRemoveEdgesFromNodeNoRelationshipTypes() throws Exception { // does the test actually test something? is the Collections.emptyList() the only reason why we // did not see changes? service.removeEdgesFromNode( - nodeToRemoveFrom, Arrays.asList(downstreamOf, hasOwner, knowsUser), outgoingRelationships); + operationContext, + nodeToRemoveFrom, + Arrays.asList(downstreamOf, hasOwner, knowsUser), + outgoingRelationships); service.removeEdgesFromNode( - nodeToRemoveFrom, Arrays.asList(downstreamOf, hasOwner, knowsUser), incomingRelationships); + operationContext, + nodeToRemoveFrom, + Arrays.asList(downstreamOf, hasOwner, knowsUser), + incomingRelationships); syncAfterWrite(); RelatedEntitiesResult relatedOutgoingEntitiesAfterRemoveAll = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1706,6 +1745,7 @@ public void testRemoveEdgesFromUnknownNode() throws Exception { // populated graph asserted in testPopulatedGraphService RelatedEntitiesResult relatedOutgoingEntitiesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1718,13 +1758,20 @@ public void testRemoveEdgesFromUnknownNode() throws Exception { // can be replaced with a single removeEdgesFromNode and undirectedRelationships once supported // by all implementations service.removeEdgesFromNode( - nodeToRemoveFrom, Arrays.asList(downstreamOf, hasOwner, knowsUser), outgoingRelationships); + operationContext, + nodeToRemoveFrom, + Arrays.asList(downstreamOf, hasOwner, knowsUser), + outgoingRelationships); service.removeEdgesFromNode( - nodeToRemoveFrom, Arrays.asList(downstreamOf, hasOwner, knowsUser), incomingRelationships); + operationContext, + nodeToRemoveFrom, + Arrays.asList(downstreamOf, hasOwner, knowsUser), + incomingRelationships); syncAfterWrite(); RelatedEntitiesResult relatedOutgoingEntitiesAfterRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1740,13 +1787,14 @@ public void testRemoveEdgesFromUnknownNode() throws Exception { public void testRemoveNode() throws Exception { GraphService service = getPopulatedGraphService(); - service.removeNode(dataset2Urn); + service.removeNode(operationContext, dataset2Urn); syncAfterWrite(); // assert the modified graph // All downstreamOf, hasOwner, knowsUser relationships minus datasetTwo's, outgoing assertEqualsAnyOrder( service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1768,6 +1816,7 @@ public void testRemoveUnknownNode() throws Exception { // populated graph asserted in testPopulatedGraphService RelatedEntitiesResult entitiesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1777,11 +1826,12 @@ public void testRemoveUnknownNode() throws Exception { 0, 100); - service.removeNode(unknownUrn); + service.removeNode(operationContext, unknownUrn); syncAfterWrite(); RelatedEntitiesResult entitiesAfterRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1806,6 +1856,7 @@ public void testClear() throws Exception { // again assertEqualsAnyOrder( service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType), EMPTY_FILTER, anyType, @@ -1817,6 +1868,7 @@ public void testClear() throws Exception { Collections.emptyList()); assertEqualsAnyOrder( service.findRelatedEntities( + operationContext, ImmutableList.of(userType), EMPTY_FILTER, anyType, @@ -1828,6 +1880,7 @@ public void testClear() throws Exception { Collections.emptyList()); assertEqualsAnyOrder( service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, ImmutableList.of(userType), @@ -1891,6 +1944,7 @@ public void testConcurrentAddEdge() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -1937,6 +1991,7 @@ public void testConcurrentRemoveEdgesFromNode() throws Exception { // assert the graph is there RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -1956,6 +2011,7 @@ public void testConcurrentRemoveEdgesFromNode() throws Exception { edge -> () -> service.removeEdgesFromNode( + operationContext, edge.getSource(), Collections.singletonList(edge.getRelationshipType()), outgoingRelationships)); @@ -1965,6 +2021,7 @@ public void testConcurrentRemoveEdgesFromNode() throws Exception { // assert the graph is gone RelatedEntitiesResult relatedEntitiesAfterDeletion = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -1998,6 +2055,7 @@ public void testConcurrentRemoveNodes() throws Exception { // assert the graph is there RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -2013,13 +2071,14 @@ public void testConcurrentRemoveNodes() throws Exception { // remove all nodes concurrently // nodes will be removed multiple times Stream operations = - edges.stream().map(edge -> () -> service.removeNode(edge.getSource())); + edges.stream().map(edge -> () -> service.removeNode(operationContext, edge.getSource())); doTestConcurrentOp(operations); syncAfterWrite(); // assert the graph is gone RelatedEntitiesResult relatedEntitiesAfterDeletion = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -2094,12 +2153,12 @@ public void testPopulatedGraphServiceGetLineageMultihop(Boolean attemptMultiPath (!((service instanceof Neo4jGraphService) || (service instanceof DgraphGraphService))); EntityLineageResult upstreamLineage = - service.getLineage(dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 2); + service.getLineage(operationContext, dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 2); assertEquals(upstreamLineage.getTotal().intValue(), 0); assertEquals(upstreamLineage.getRelationships().size(), 0); EntityLineageResult downstreamLineage = - service.getLineage(dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); + service.getLineage(operationContext, dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); assertEquals(downstreamLineage.getTotal().intValue(), 5); assertEquals(downstreamLineage.getRelationships().size(), 5); @@ -2124,7 +2183,8 @@ public void testPopulatedGraphServiceGetLineageMultihop(Boolean attemptMultiPath assertTrue(relationships.containsKey(dataJobTwoUrn)); assertEquals(relationships.get(dataJobTwoUrn).getDegree(), 1); - upstreamLineage = service.getLineage(dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 2); + upstreamLineage = + service.getLineage(operationContext, dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 2); assertEquals(upstreamLineage.getTotal().intValue(), 3); assertEquals(upstreamLineage.getRelationships().size(), 3); relationships = @@ -2137,7 +2197,8 @@ public void testPopulatedGraphServiceGetLineageMultihop(Boolean attemptMultiPath assertTrue(relationships.containsKey(dataJobOneUrn)); assertEquals(relationships.get(dataJobOneUrn).getDegree(), 1); - downstreamLineage = service.getLineage(dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); + downstreamLineage = + service.getLineage(operationContext, dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); assertEquals(downstreamLineage.getTotal().intValue(), 0); assertEquals(downstreamLineage.getRelationships().size(), 0); } @@ -2156,6 +2217,7 @@ public void testHighlyConnectedGraphWalk() throws Exception { Set expectedRelatedEntities = convertEdgesToRelatedEntities(edges); RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -2169,9 +2231,13 @@ public void testHighlyConnectedGraphWalk() throws Exception { expectedRelatedEntities); Urn root = dataset1Urn; + OperationContext limitedHopOpContext = + operationContext.withLineageFlags(f -> f.setEntitiesExploredPerHopLimit(5)); + EntityLineageResult lineageResult = getGraphService(false) .getLineage( + limitedHopOpContext, root, LineageDirection.UPSTREAM, new GraphFilters( @@ -2183,8 +2249,7 @@ public void testHighlyConnectedGraphWalk() throws Exception { .collect(Collectors.toList())), 0, 1000, - 100, - new LineageFlags().setEntitiesExploredPerHopLimit(5)); + 100); // Unable to explore all paths because multi is disabled, but will be at least 5 since it will // explore 5 edges assertTrue( @@ -2201,6 +2266,7 @@ public void testHighlyConnectedGraphWalk() throws Exception { EntityLineageResult lineageResultMulti = getGraphService(true) .getLineage( + limitedHopOpContext, root, LineageDirection.UPSTREAM, new GraphFilters( @@ -2212,8 +2278,7 @@ public void testHighlyConnectedGraphWalk() throws Exception { .collect(Collectors.toList())), 0, 1000, - 100, - new LineageFlags().setEntitiesExploredPerHopLimit(5)); + 100); assertTrue( lineageResultMulti.getRelationships().size() >= 5 diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java index e4cefaa1feaa1..a4a93b29f50c6 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java @@ -1,8 +1,10 @@ package com.linkedin.metadata.graph; import static com.linkedin.metadata.search.utils.QueryUtils.*; +import static org.mockito.Mockito.mock; import static org.testng.Assert.*; +import io.datahubproject.metadata.context.OperationContext; import java.util.Arrays; import java.util.Collections; import org.testng.annotations.DataProvider; @@ -220,6 +222,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allOutgoingRelatedEntities = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -243,6 +246,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allIncomingRelatedEntities = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -269,6 +273,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allUnknownRelationshipTypeRelatedEntities = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -281,6 +286,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult someUnknownRelationshipTypeRelatedEntities = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -306,6 +312,7 @@ public void testPopulatedGraphService() throws Exception { RelatedEntitiesResult relatedOutgoingEntitiesBeforeRemove = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -328,6 +335,7 @@ public void testPopulatedGraphService() throws Exception { downstreamOfSchemaFieldTwo)); RelatedEntitiesResult relatedIncomingEntitiesBeforeRemove = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -360,13 +368,14 @@ public void testPopulatedGraphService() throws Exception { public void testRemoveNode() throws Exception { GraphService service = getPopulatedGraphService(); - service.removeNode(dataset2Urn); + service.removeNode(mock(OperationContext.class), dataset2Urn); syncAfterWrite(); // assert the modified graph // All downstreamOf, hasOwner, knowsUser relationships minus datasetTwo's, outgoing assertEqualsAnyOrder( service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java similarity index 92% rename from metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java index 0bf7df1fc8e7c..b8e3a6e107128 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.graph.search; +package com.linkedin.metadata.graph.elastic; import com.google.common.collect.ImmutableList; import com.google.common.io.Resources; @@ -9,10 +9,11 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.graph.GraphFilters; -import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO; import com.linkedin.metadata.models.registry.LineageRegistry; import com.linkedin.metadata.query.LineageFlags; import com.linkedin.metadata.query.filter.RelationshipDirection; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -21,6 +22,7 @@ import java.util.Map; import org.opensearch.index.query.QueryBuilder; import org.testng.Assert; +import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; public class ESGraphQueryDAOTest { @@ -34,8 +36,15 @@ public class ESGraphQueryDAOTest { private static final String TEST_QUERY_FILE_FULL_MULTIPLE_FILTERS = "elasticsearch/sample_filters/lineage_query_filters_full_multiple_filters.json"; + private OperationContext operationContext; + + @BeforeTest + public void init() { + operationContext = TestOperationContexts.systemContextNoSearchAuthorization(); + } + @Test - private static void testGetQueryForLineageFullArguments() throws Exception { + private void testGetQueryForLineageFullArguments() throws Exception { URL urlLimited = Resources.getResource(TEST_QUERY_FILE_LIMITED); String expectedQueryLimited = Resources.toString(urlLimited, StandardCharsets.UTF_8); @@ -108,21 +117,26 @@ private static void testGetQueryForLineageFullArguments() throws Exception { QueryBuilder fullBuilder = graphQueryDAO.getLineageQuery( + operationContext.withLineageFlags( + f -> new LineageFlags().setEndTimeMillis(endTime).setStartTimeMillis(startTime)), urnsPerEntityType, edgesPerEntityType, - graphFilters, - new LineageFlags().setEndTimeMillis(endTime).setStartTimeMillis(startTime)); + graphFilters); QueryBuilder fullBuilderEmptyFilters = graphQueryDAO.getLineageQuery( - urnsPerEntityType, edgesPerEntityType, GraphFilters.emptyGraphFilters, null); + operationContext, + urnsPerEntityType, + edgesPerEntityType, + GraphFilters.emptyGraphFilters); QueryBuilder fullBuilderMultipleFilters = graphQueryDAO.getLineageQuery( + operationContext.withLineageFlags( + f -> new LineageFlags().setEndTimeMillis(endTime).setStartTimeMillis(startTime)), urnsPerEntityTypeMultiple, edgesPerEntityTypeMultiple, - graphFiltersMultiple, - new LineageFlags().setEndTimeMillis(endTime).setStartTimeMillis(startTime)); + graphFiltersMultiple); Assert.assertEquals(limitedBuilder.toString(), expectedQueryLimited); Assert.assertEquals(fullBuilder.toString(), expectedQueryFull); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAOTest.java new file mode 100644 index 0000000000000..ac96257e8ec41 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAOTest.java @@ -0,0 +1,33 @@ +package com.linkedin.metadata.graph.elastic; + +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; + +import com.linkedin.metadata.config.search.GraphQueryConfiguration; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import com.linkedin.metadata.utils.elasticsearch.IndexConvention; +import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.script.Script; +import org.testng.annotations.Test; + +public class ESGraphWriteDAOTest { + public static final IndexConvention TEST_INDEX_CONVENTION = IndexConventionImpl.noPrefix("md5"); + + @Test + public void testUpdateByQuery() { + ESBulkProcessor mockBulkProcess = mock(ESBulkProcessor.class); + GraphQueryConfiguration config = new GraphQueryConfiguration(); + config.setGraphStatusEnabled(true); + ESGraphWriteDAO test = new ESGraphWriteDAO(TEST_INDEX_CONVENTION, mockBulkProcess, 0, config); + + test.updateByQuery(new Script("test"), QueryBuilders.boolQuery()); + + verify(mockBulkProcess) + .updateByQuery( + eq(new Script("test")), eq(QueryBuilders.boolQuery()), eq("graph_service_v1")); + verifyNoMoreInteractions(mockBulkProcess); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java new file mode 100644 index 0000000000000..1f53b9c4e999e --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java @@ -0,0 +1,98 @@ +package com.linkedin.metadata.graph.elastic; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; +import com.linkedin.metadata.entity.TestEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.models.registry.LineageRegistry; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; +import java.util.Set; +import org.mockito.ArgumentCaptor; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.ExistsQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.script.Script; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +public class ElasticSearchGraphServiceTest { + + private ElasticSearchGraphService test; + private ESBulkProcessor mockESBulkProcessor; + private ESGraphWriteDAO mockWriteDAO; + private ESGraphQueryDAO mockReadDAO; + + @BeforeTest + public void beforeTest() { + EntityRegistry entityRegistry = new TestEntityRegistry(); + mockESBulkProcessor = mock(ESBulkProcessor.class); + mockWriteDAO = mock(ESGraphWriteDAO.class); + mockReadDAO = mock(ESGraphQueryDAO.class); + test = + new ElasticSearchGraphService( + new LineageRegistry(entityRegistry), + mockESBulkProcessor, + IndexConventionImpl.noPrefix("md5"), + mockWriteDAO, + mockReadDAO, + mock(ESIndexBuilder.class), + "md5"); + } + + @BeforeMethod + public void beforeMethod() { + reset(mockESBulkProcessor, mockWriteDAO, mockReadDAO); + } + + @Test + public void testSetEdgeStatus() { + final Urn testUrn = UrnUtils.getUrn("urn:li:container:test"); + for (boolean removed : Set.of(true, false)) { + test.setEdgeStatus(testUrn, removed, EdgeUrnType.values()); + + ArgumentCaptor