diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 89ca7400c7ccf..587833a41ba16 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -989,7 +989,7 @@ jobs: DATAHUB_VERSION: ${{ needs.setup.outputs.unique_tag }} DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_INGESTION_IMAGE }} ACTIONS_VERSION: ${{ needs.datahub_ingestion_slim_build.outputs.tag || 'head-slim' }} - ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor]==0.0.13 acryl-datahub-actions==0.0.13 acryl-datahub==0.10.5" + ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor] acryl-datahub-actions" ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml" run: | ./smoke-test/run-quickstart.sh diff --git a/build.gradle b/build.gradle index fbced335ddc2e..07ca1f09e813c 100644 --- a/build.gradle +++ b/build.gradle @@ -52,7 +52,7 @@ buildscript { ext.hadoop3Version = '3.3.6' ext.kafkaVersion = '5.5.15' ext.hazelcastVersion = '5.3.6' - ext.ebeanVersion = '12.16.1' + ext.ebeanVersion = '15.5.2' ext.googleJavaFormatVersion = '1.18.1' ext.openLineageVersion = '1.19.0' ext.logbackClassicJava8 = '1.2.12' @@ -104,8 +104,8 @@ project.ext.spec = [ project.ext.externalDependency = [ 'akkaHttp': 'com.typesafe.akka:akka-http-core_2.12:10.2.10', - 'antlr4Runtime': 'org.antlr:antlr4-runtime:4.7.2', - 'antlr4': 'org.antlr:antlr4:4.7.2', + 'antlr4Runtime': 'org.antlr:antlr4-runtime:4.9.3', + 'antlr4': 'org.antlr:antlr4:4.9.3', 'assertJ': 'org.assertj:assertj-core:3.11.1', 'avro': 'org.apache.avro:avro:1.11.3', 'avroCompiler': 'org.apache.avro:avro-compiler:1.11.3', @@ -129,8 +129,10 @@ project.ext.externalDependency = [ 'dropwizardMetricsCore': 'io.dropwizard.metrics:metrics-core:4.2.3', 'dropwizardMetricsJmx': 'io.dropwizard.metrics:metrics-jmx:4.2.3', 'ebean': 'io.ebean:ebean:' + ebeanVersion, + 'ebeanTest': 'io.ebean:ebean-test:' + ebeanVersion, 'ebeanAgent': 'io.ebean:ebean-agent:' + ebeanVersion, 'ebeanDdl': 'io.ebean:ebean-ddl-generator:' + ebeanVersion, + 'ebeanQueryBean': 'io.ebean:querybean-generator:' + ebeanVersion, 'elasticSearchRest': 'org.opensearch.client:opensearch-rest-high-level-client:' + elasticsearchVersion, 'elasticSearchJava': 'org.opensearch.client:opensearch-java:2.6.0', 'findbugsAnnotations': 'com.google.code.findbugs:annotations:3.0.1', @@ -359,6 +361,9 @@ configure(subprojects.findAll {! it.name.startsWith('spark-lineage')}) { exclude group: "org.slf4j", module: "slf4j-log4j12" exclude group: "org.slf4j", module: "slf4j-nop" exclude group: "org.slf4j", module: "slf4j-ext" + + resolutionStrategy.force externalDependency.antlr4Runtime + resolutionStrategy.force externalDependency.antlr4 } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java index d34fd9f32d322..5f873b4bebab3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java @@ -6,7 +6,6 @@ import com.datahub.authentication.Authentication; import com.fasterxml.jackson.core.StreamReadConstraints; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; @@ -14,14 +13,12 @@ import com.linkedin.datahub.graphql.generated.AndFilterInput; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.resolvers.search.SearchUtils; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; -import com.linkedin.metadata.search.utils.ESUtils; import com.linkedin.metadata.service.ViewService; import com.linkedin.view.DataHubViewInfo; import graphql.schema.DataFetchingEnvironment; @@ -39,8 +36,6 @@ public class ResolverUtils { - private static final Set KEYWORD_EXCLUDED_FILTERS = - ImmutableSet.of("runId", "_entityType"); private static final ObjectMapper MAPPER = new ObjectMapper(); static { @@ -111,11 +106,10 @@ public static Map buildFacetFilters( return facetFilters; } - public static List criterionListFromAndFilter( - List andFilters, @Nullable AspectRetriever aspectRetriever) { + public static List criterionListFromAndFilter(List andFilters) { return andFilters != null && !andFilters.isEmpty() ? andFilters.stream() - .map(filter -> criterionFromFilter(filter, aspectRetriever)) + .map(filter -> criterionFromFilter(filter)) .collect(Collectors.toList()) : Collections.emptyList(); } @@ -124,14 +118,13 @@ public static List criterionListFromAndFilter( // conjunctive criterion // arrays, rather than just one for the AND case. public static ConjunctiveCriterionArray buildConjunctiveCriterionArrayWithOr( - @Nonnull List orFilters, @Nullable AspectRetriever aspectRetriever) { + @Nonnull List orFilters) { return new ConjunctiveCriterionArray( orFilters.stream() .map( orFilter -> { CriterionArray andCriterionForOr = - new CriterionArray( - criterionListFromAndFilter(orFilter.getAnd(), aspectRetriever)); + new CriterionArray(criterionListFromAndFilter(orFilter.getAnd())); return new ConjunctiveCriterion().setAnd(andCriterionForOr); }) .collect(Collectors.toList())); @@ -139,9 +132,7 @@ public static ConjunctiveCriterionArray buildConjunctiveCriterionArrayWithOr( @Nullable public static Filter buildFilter( - @Nullable List andFilters, - @Nullable List orFilters, - @Nullable AspectRetriever aspectRetriever) { + @Nullable List andFilters, @Nullable List orFilters) { if ((andFilters == null || andFilters.isEmpty()) && (orFilters == null || orFilters.isEmpty())) { return null; @@ -150,34 +141,21 @@ public static Filter buildFilter( // Or filters are the new default. We will check them first. // If we have OR filters, we need to build a series of CriterionArrays if (orFilters != null && !orFilters.isEmpty()) { - return new Filter().setOr(buildConjunctiveCriterionArrayWithOr(orFilters, aspectRetriever)); + return new Filter().setOr(buildConjunctiveCriterionArrayWithOr(orFilters)); } // If or filters are not set, someone may be using the legacy and filters - final List andCriterions = criterionListFromAndFilter(andFilters, aspectRetriever); + final List andCriterions = criterionListFromAndFilter(andFilters); return new Filter() .setOr( new ConjunctiveCriterionArray( new ConjunctiveCriterion().setAnd(new CriterionArray(andCriterions)))); } - public static Criterion criterionFromFilter( - final FacetFilterInput filter, @Nullable AspectRetriever aspectRetriever) { - return criterionFromFilter(filter, false, aspectRetriever); - } - // Translates a FacetFilterInput (graphql input class) into Criterion (our internal model) - public static Criterion criterionFromFilter( - final FacetFilterInput filter, - final Boolean skipKeywordSuffix, - @Nullable AspectRetriever aspectRetriever) { + public static Criterion criterionFromFilter(final FacetFilterInput filter) { Criterion result = new Criterion(); - - if (skipKeywordSuffix) { - result.setField(filter.getField()); - } else { - result.setField(getFilterField(filter.getField(), skipKeywordSuffix, aspectRetriever)); - } + result.setField(filter.getField()); // `value` is deprecated in place of `values`- this is to support old query patterns. If values // is provided, @@ -210,16 +188,6 @@ public static Criterion criterionFromFilter( return result; } - private static String getFilterField( - final String originalField, - final boolean skipKeywordSuffix, - @Nullable AspectRetriever aspectRetriever) { - if (KEYWORD_EXCLUDED_FILTERS.contains(originalField)) { - return originalField; - } - return ESUtils.toKeywordField(originalField, skipKeywordSuffix, aspectRetriever); - } - public static Filter viewFilter( OperationContext opContext, ViewService viewService, String viewUrn) { if (viewUrn == null) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java index 0e9d2cea61141..f7e86f1f2f345 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java @@ -12,6 +12,7 @@ import com.linkedin.datahub.graphql.generated.AssertionRunStatus; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.generated.FilterInput; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.types.dataset.mappers.AssertionRunEventMapper; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; @@ -147,7 +148,7 @@ public static Filter buildFilter( .setAnd( new CriterionArray( facetFilters.stream() - .map(filter -> criterionFromFilter(filter, true, aspectRetriever)) + .map(ResolverUtils::criterionFromFilter) .collect(Collectors.toList()))))); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java index e0ecebbbc7bc2..29d44b526692f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java @@ -71,10 +71,7 @@ public CompletableFuture get(DataFetchingEnvironment envi .withSearchFlags(flags -> flags.setFulltext(true)), Constants.ACCESS_TOKEN_ENTITY_NAME, "", - buildFilter( - filters, - Collections.emptyList(), - context.getOperationContext().getAspectRetriever()), + buildFilter(filters, Collections.emptyList()), sortCriteria, start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java index b54ca398aef98..18ee5f595ce58 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java @@ -74,9 +74,7 @@ public CompletableFuture get(DataFetchingEnvironment environmen ? BROWSE_PATH_V2_DELIMITER + String.join(BROWSE_PATH_V2_DELIMITER, input.getPath()) : ""; - final Filter inputFilter = - ResolverUtils.buildFilter( - null, input.getOrFilters(), context.getOperationContext().getAspectRetriever()); + final Filter inputFilter = ResolverUtils.buildFilter(null, input.getOrFilters()); BrowseResultV2 browseResults = _entityClient.browseV2( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java index 5c43fafb678c5..e59f7b3116acd 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java @@ -151,11 +151,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) filters.removeIf(f -> f.getField().equals(OUTPUT_PORTS_FILTER_FIELD)); } // add urns from the aspect to our filters - final Filter baseFilter = - ResolverUtils.buildFilter( - filters, - input.getOrFilters(), - context.getOperationContext().getAspectRetriever()); + final Filter baseFilter = ResolverUtils.buildFilter(filters, input.getOrFilters()); final Filter finalFilter = buildFilterWithUrns( context.getDataHubAppConfig(), new HashSet<>(urnsToFilterOn), baseFilter); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java index 6a880503802cb..c6265380fb2fd 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java @@ -79,9 +79,7 @@ public CompletableFuture get(final DataFetchingEnvironment enviro .getFilters() .forEach( filter -> { - criteria.add( - criterionFromFilter( - filter, true, context.getOperationContext().getAspectRetriever())); + criteria.add(criterionFromFilter(filter)); }); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java index 3cf4d9175d45b..b9d74f8af660e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java @@ -33,9 +33,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final CreateDynamicFormAssignmentInput input = bindArgument(environment.getArgument("input"), CreateDynamicFormAssignmentInput.class); final Urn formUrn = UrnUtils.getUrn(input.getFormUrn()); - final DynamicFormAssignment formAssignment = - FormUtils.mapDynamicFormAssignment( - input, context.getOperationContext().getAspectRetriever()); + final DynamicFormAssignment formAssignment = FormUtils.mapDynamicFormAssignment(input); return GraphQLConcurrencyUtils.supplyAsync( () -> { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java index 1a2806224e4a9..8ead47aa65ceb 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java @@ -68,10 +68,7 @@ public CompletableFuture get( .withSearchFlags(flags -> flags.setFulltext(true)), Constants.INGESTION_SOURCE_ENTITY_NAME, query, - buildFilter( - filters, - Collections.emptyList(), - context.getOperationContext().getAspectRetriever()), + buildFilter(filters, Collections.emptyList()), null, start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java index 4d4b898618bf9..0f4f0b2645e42 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java @@ -9,9 +9,9 @@ import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.FilterInput; import com.linkedin.datahub.graphql.generated.TimeSeriesAspect; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; @@ -120,7 +120,7 @@ public CompletableFuture> get(DataFetchingEnvironment env maybeStartTimeMillis, maybeEndTimeMillis, maybeLimit, - buildFilters(maybeFilters, context.getOperationContext().getAspectRetriever()), + buildFilters(maybeFilters), maybeSort); // Step 2: Bind profiles into GraphQL strong types. @@ -135,8 +135,7 @@ public CompletableFuture> get(DataFetchingEnvironment env "get"); } - private Filter buildFilters( - @Nullable FilterInput maybeFilters, @Nullable AspectRetriever aspectRetriever) { + private Filter buildFilters(@Nullable FilterInput maybeFilters) { if (maybeFilters == null) { return null; } @@ -147,7 +146,7 @@ private Filter buildFilters( .setAnd( new CriterionArray( maybeFilters.getAnd().stream() - .map(filter -> criterionFromFilter(filter, true, aspectRetriever)) + .map(ResolverUtils::criterionFromFilter) .collect(Collectors.toList()))))); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java index d118c04d19393..cac0cca2682e8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java @@ -18,7 +18,6 @@ import com.linkedin.form.FormPromptType; import com.linkedin.form.FormType; import com.linkedin.form.StructuredPropertyParams; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -31,7 +30,6 @@ import java.util.UUID; import java.util.stream.Collectors; import javax.annotation.Nonnull; -import javax.annotation.Nullable; public class FormUtils { @@ -61,16 +59,13 @@ public static PrimitivePropertyValueArray getStructuredPropertyValuesFromInput( /** Map a GraphQL CreateDynamicFormAssignmentInput to the GMS DynamicFormAssignment aspect */ @Nonnull public static DynamicFormAssignment mapDynamicFormAssignment( - @Nonnull final CreateDynamicFormAssignmentInput input, - @Nullable AspectRetriever aspectRetriever) { + @Nonnull final CreateDynamicFormAssignmentInput input) { Objects.requireNonNull(input, "input must not be null"); final DynamicFormAssignment result = new DynamicFormAssignment(); final Filter filter = new Filter() - .setOr( - ResolverUtils.buildConjunctiveCriterionArrayWithOr( - input.getOrFilters(), aspectRetriever)); + .setOr(ResolverUtils.buildConjunctiveCriterionArrayWithOr(input.getOrFilters())); result.setFilter(filter); return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java index da0d5dd07a94f..05fc540e39de4 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java @@ -63,10 +63,7 @@ public CompletableFuture get(DataFetchingEnvironment e context.getOperationContext().withSearchFlags(flags -> flags.setFulltext(true)), Constants.OWNERSHIP_TYPE_ENTITY_NAME, query, - buildFilter( - filters, - Collections.emptyList(), - context.getOperationContext().getAspectRetriever()), + buildFilter(filters, Collections.emptyList()), Collections.singletonList(DEFAULT_SORT_CRITERION), start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java index ce11451aa1913..4120401e0150f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java @@ -59,11 +59,7 @@ public CompletableFuture get(final DataFetchingEnvironment e log.debug( "User {} listing policies with filters {}", context.getActorUrn(), filters.toString()); - final Filter filter = - ResolverUtils.buildFilter( - facetFilters, - Collections.emptyList(), - context.getOperationContext().getAspectRetriever()); + final Filter filter = ResolverUtils.buildFilter(facetFilters, Collections.emptyList()); return _policyFetcher .fetchPolicies(context.getOperationContext(), start, query, count, filter) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java index aa411f019a4c0..3c84884bedbae 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java @@ -15,7 +15,6 @@ import com.linkedin.datahub.graphql.generated.ListQueriesResult; import com.linkedin.datahub.graphql.generated.QueryEntity; import com.linkedin.entity.client.EntityClient; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; @@ -74,7 +73,7 @@ public CompletableFuture get(final DataFetchingEnvironment en flags -> flags.setFulltext(true).setSkipHighlighting(true)), QUERY_ENTITY_NAME, query, - buildFilters(input, context.getOperationContext().getAspectRetriever()), + buildFilters(input), sortCriteria, start, count); @@ -111,8 +110,7 @@ private List mapUnresolvedQueries(final List queryUrns) { } @Nullable - private Filter buildFilters( - @Nonnull final ListQueriesInput input, @Nullable AspectRetriever aspectRetriever) { + private Filter buildFilters(@Nonnull final ListQueriesInput input) { final AndFilterInput criteria = new AndFilterInput(); List andConditions = new ArrayList<>(); @@ -139,6 +137,6 @@ private Filter buildFilters( } criteria.setAnd(andConditions); - return buildFilter(Collections.emptyList(), ImmutableList.of(criteria), aspectRetriever); + return buildFilter(Collections.emptyList(), ImmutableList.of(criteria)); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java index 0181877864390..28334b2c0af9a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java @@ -17,6 +17,7 @@ import com.linkedin.datahub.graphql.generated.RecommendationRenderType; import com.linkedin.datahub.graphql.generated.RecommendationRequestContext; import com.linkedin.datahub.graphql.generated.SearchParams; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.types.common.mappers.UrnToEntityMapper; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; import com.linkedin.metadata.query.filter.CriterionArray; @@ -105,9 +106,7 @@ private com.linkedin.metadata.recommendation.RecommendationRequestContext mapReq searchRequestContext.setFilters( new CriterionArray( requestContext.getSearchRequestContext().getFilters().stream() - .map( - facetField -> - criterionFromFilter(facetField, opContext.getAspectRetriever())) + .map(ResolverUtils::criterionFromFilter) .collect(Collectors.toList()))); } mappedRequestContext.setSearchRequestContext(searchRequestContext); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java index 19bccaf265086..29b71d95ad974 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java @@ -64,9 +64,7 @@ public CompletableFuture get(DataFetchingEnvironment environme UrnUtils.getUrn(input.getViewUrn())) : null; - final Filter inputFilter = - ResolverUtils.buildFilter( - null, input.getOrFilters(), context.getOperationContext().getAspectRetriever()); + final Filter inputFilter = ResolverUtils.buildFilter(null, input.getOrFilters()); final SearchFlags searchFlags = mapInputFlags(context, input.getSearchFlags()); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java index 79792940ef27f..5e4f56dbfff98 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java @@ -51,11 +51,7 @@ public CompletableFuture get(DataFetchingEnvironment enviro throw new ValidationException("'query' parameter can not be null or empty"); } - final Filter filter = - ResolverUtils.buildFilter( - input.getFilters(), - input.getOrFilters(), - context.getOperationContext().getRetrieverContext().orElseThrow().getAspectRetriever()); + final Filter filter = ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); final int limit = input.getLimit() != null ? input.getLimit() : DEFAULT_LIMIT; return GraphQLConcurrencyUtils.supplyAsync( () -> { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java index 5b5888b89b241..c28b18acf195b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java @@ -43,10 +43,7 @@ public static CompletableFuture batchGetAutocomplet GraphQLConcurrencyUtils.supplyAsync( () -> { final Filter filter = - ResolverUtils.buildFilter( - input.getFilters(), - input.getOrFilters(), - context.getOperationContext().getAspectRetriever()); + ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); final Filter finalFilter = view != null ? SearchUtils.combineFilters( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java index 8b8b93353bc6e..77eef1b9a25c6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java @@ -72,9 +72,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) UrnUtils.getUrn(input.getViewUrn())) : null; - final Filter baseFilter = - ResolverUtils.buildFilter( - null, input.getOrFilters(), context.getOperationContext().getAspectRetriever()); + final Filter baseFilter = ResolverUtils.buildFilter(null, input.getOrFilters()); final SearchFlags searchFlags; com.linkedin.datahub.graphql.generated.SearchFlags inputFlags = input.getSearchFlags(); if (inputFlags != null) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java index 1b719b6f78620..2c058eb60a7ee 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java @@ -130,10 +130,7 @@ public CompletableFuture get(DataFetchingEnvironment entityNames, sanitizedQuery, maxHops, - ResolverUtils.buildFilter( - facetFilters, - input.getOrFilters(), - context.getOperationContext().getAspectRetriever()), + ResolverUtils.buildFilter(facetFilters, input.getOrFilters()), null, scrollId, keepAlive, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java index 0dbed92b7d58e..d103704146d39 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java @@ -61,10 +61,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) : null; final Filter baseFilter = - ResolverUtils.buildFilter( - input.getFilters(), - input.getOrFilters(), - context.getOperationContext().getAspectRetriever()); + ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); SearchFlags searchFlags = mapInputFlags(context, input.getSearchFlags()); List sortCriteria; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java index dc3a1fc17e4ec..c90be924ac69f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java @@ -139,10 +139,7 @@ public CompletableFuture get(DataFetchingEnvironment count); final Filter filter = - ResolverUtils.buildFilter( - input.getFilters(), - input.getOrFilters(), - context.getOperationContext().getAspectRetriever()); + ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); final SearchFlags searchFlags; com.linkedin.datahub.graphql.generated.SearchFlags inputFlags = input.getSearchFlags(); if (inputFlags != null) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java index 7a48e305dbfe4..45751fc6eb8cb 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java @@ -86,10 +86,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) context.getOperationContext().withSearchFlags(flags -> searchFlags), entityName, sanitizedQuery, - ResolverUtils.buildFilter( - input.getFilters(), - input.getOrFilters(), - context.getOperationContext().getAspectRetriever()), + ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()), Collections.emptyList(), start, count)); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java index 265f4d5f5d56e..bda950ef3eb58 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java @@ -16,7 +16,6 @@ import com.linkedin.datahub.graphql.generated.ListViewsResult; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; @@ -31,7 +30,6 @@ import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; import javax.annotation.Nonnull; -import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; /** Resolver used for listing global DataHub Views. */ @@ -73,7 +71,7 @@ public CompletableFuture get(final DataFetchingEnvironment envi context.getOperationContext().withSearchFlags(flags -> flags.setFulltext(true)), Constants.DATAHUB_VIEW_ENTITY_NAME, query, - buildFilters(context.getOperationContext().getAspectRetriever()), + buildFilters(), Collections.singletonList(DEFAULT_SORT_CRITERION), start, count); @@ -109,7 +107,7 @@ private List mapUnresolvedViews(final List entityUrns) { return results; } - private Filter buildFilters(@Nullable AspectRetriever aspectRetriever) { + private Filter buildFilters() { final AndFilterInput globalCriteria = new AndFilterInput(); List andConditions = new ArrayList<>(); andConditions.add( @@ -120,6 +118,6 @@ private Filter buildFilters(@Nullable AspectRetriever aspectRetriever) { false, FilterOperator.EQUAL)); globalCriteria.setAnd(andConditions); - return buildFilter(Collections.emptyList(), ImmutableList.of(globalCriteria), aspectRetriever); + return buildFilter(Collections.emptyList(), ImmutableList.of(globalCriteria)); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java index abfdeb2d60869..53a7c92bbe8ec 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java @@ -132,6 +132,6 @@ private Filter buildFilters( filterCriteria.setAnd(andConditions); // Currently, there is no way to fetch the views belonging to another user. - return buildFilter(Collections.emptyList(), ImmutableList.of(filterCriteria), aspectRetriever); + return buildFilter(Collections.emptyList(), ImmutableList.of(filterCriteria)); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtils.java index 70a5ced4bfbf1..aa41a1e487651 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtils.java @@ -141,7 +141,7 @@ private static Filter buildAndFilter( .setAnd( new CriterionArray( input.stream() - .map(f -> ResolverUtils.criterionFromFilter(f, aspectRetriever)) + .map(ResolverUtils::criterionFromFilter) .collect(Collectors.toList())))))); return result; } @@ -157,9 +157,7 @@ private static Filter buildOrFilter( new ConjunctiveCriterion() .setAnd( new CriterionArray( - ImmutableList.of( - ResolverUtils.criterionFromFilter( - filter, aspectRetriever))))) + ImmutableList.of(ResolverUtils.criterionFromFilter(filter))))) .collect(Collectors.toList()))); return result; } diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 9c4375e70d9ba..32d73845e1ae4 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -551,6 +551,16 @@ enum FilterOperator { """ LESS_THAN_OR_EQUAL_TO + """ + Represent the relation: String field starts with value, e.g. name starts with PageView + """ + START_WITH + + """ + Represent the relation: String field ends with value, e.g. name ends with Event + """ + END_WITH + """ Represent the relation: URN field any nested children in addition to the given URN """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java index b01aacaaab65c..2950b50bec8f1 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java @@ -13,7 +13,6 @@ import com.linkedin.datahub.graphql.TestUtils; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.generated.FilterOperator; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.DataHubAppConfiguration; import com.linkedin.metadata.config.MetadataChangeProposalConfig; import com.linkedin.metadata.query.filter.Condition; @@ -45,8 +44,7 @@ public void testCriterionFromFilter() throws Exception { null, ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def"), false, - FilterOperator.EQUAL), - mock(AspectRetriever.class)); + FilterOperator.EQUAL)); assertEquals( valuesCriterion, new Criterion() @@ -54,13 +52,12 @@ public void testCriterionFromFilter() throws Exception { .setValues(new StringArray(ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def"))) .setNegated(false) .setCondition(Condition.EQUAL) - .setField("tags.keyword")); + .setField("tags")); // this is the legacy pathway Criterion valueCriterion = criterionFromFilter( - new FacetFilterInput("tags", "urn:li:tag:abc", null, true, FilterOperator.EQUAL), - mock(AspectRetriever.class)); + new FacetFilterInput("tags", "urn:li:tag:abc", null, true, FilterOperator.EQUAL)); assertEquals( valueCriterion, new Criterion() @@ -68,14 +65,12 @@ public void testCriterionFromFilter() throws Exception { .setValues(new StringArray(ImmutableList.of("urn:li:tag:abc"))) .setNegated(true) .setCondition(Condition.EQUAL) - .setField("tags.keyword")); + .setField("tags")); // check that both being null doesn't cause a NPE. this should never happen except via API // interaction Criterion doubleNullCriterion = - criterionFromFilter( - new FacetFilterInput("tags", null, null, true, FilterOperator.EQUAL), - mock(AspectRetriever.class)); + criterionFromFilter(new FacetFilterInput("tags", null, null, true, FilterOperator.EQUAL)); assertEquals( doubleNullCriterion, new Criterion() @@ -83,7 +78,7 @@ public void testCriterionFromFilter() throws Exception { .setValues(new StringArray(ImmutableList.of())) .setNegated(true) .setCondition(Condition.EQUAL) - .setField("tags.keyword")); + .setField("tags")); } @Test diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java index 020f74475ea60..30962aaef5412 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java @@ -46,7 +46,7 @@ public void testGetSuccess() throws Exception { any(), Mockito.eq(Constants.ACCESS_TOKEN_ENTITY_NAME), Mockito.eq(""), - Mockito.eq(buildFilter(filters, Collections.emptyList(), null)), + Mockito.eq(buildFilter(filters, Collections.emptyList())), Mockito.any(List.class), Mockito.eq(input.getStart()), Mockito.eq(input.getCount()))) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java index 9cf7e62e65e25..4897d0819b59f 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java @@ -2,7 +2,6 @@ import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.mock; import com.google.common.collect.ImmutableList; import com.linkedin.common.AuditStamp; @@ -18,7 +17,6 @@ import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.resolvers.chart.BrowseV2Resolver; import com.linkedin.entity.client.EntityClient; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.browse.BrowseResultGroupV2; import com.linkedin.metadata.browse.BrowseResultGroupV2Array; import com.linkedin.metadata.browse.BrowseResultMetadata; @@ -102,7 +100,7 @@ public static void testBrowseV2SuccessWithQueryAndFilter() throws Exception { facetFilterInput.setValues(ImmutableList.of("urn:li:corpuser:test")); andFilterInput.setAnd(ImmutableList.of(facetFilterInput)); orFilters.add(andFilterInput); - Filter filter = ResolverUtils.buildFilter(null, orFilters, mock(AspectRetriever.class)); + Filter filter = ResolverUtils.buildFilter(null, orFilters); EntityClient mockClient = initMockEntityClient( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java index ee728b17e8c62..414c62b693b69 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java @@ -170,6 +170,6 @@ private Filter buildFilter(@Nullable QuerySource source, @Nullable String entity FilterOperator.EQUAL)); } criteria.setAnd(andConditions); - return ResolverUtils.buildFilter(Collections.emptyList(), ImmutableList.of(criteria), null); + return ResolverUtils.buildFilter(Collections.emptyList(), ImmutableList.of(criteria)); } } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java index d32eb9fcf120c..129866bb0fa07 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java @@ -107,7 +107,7 @@ public static void testApplyViewBaseFilter() throws Exception { FormService mockFormService = Mockito.mock(FormService.class); ViewService mockService = initMockViewService(TEST_VIEW_URN, info); - Filter baseFilter = createFilter("baseField.keyword", "baseTest"); + Filter baseFilter = createFilter("baseField", "baseTest"); EntityClient mockClient = initMockEntityClient( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java index 30d6f2dc6f283..86508f1fd2742 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java @@ -164,7 +164,7 @@ public static void testApplyViewBaseFilter() throws Exception { new CriterionArray( ImmutableList.of( new Criterion() - .setField("baseField.keyword") + .setField("baseField") .setValue("baseTest") .setCondition(Condition.EQUAL) .setNegated(false) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolverTest.java index 502188d4977a5..c009cf37c5397 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolverTest.java @@ -110,7 +110,7 @@ public void testGetSuccess() throws Exception { ImmutableList.of( new Criterion() .setCondition(Condition.EQUAL) - .setField("test1.keyword") + .setField("test1") .setValue( "value1") // Unfortunate --- For // backwards compat. @@ -121,7 +121,7 @@ public void testGetSuccess() throws Exception { .setNegated(false), new Criterion() .setCondition(Condition.IN) - .setField("test2.keyword") + .setField("test2") .setValue( "value1") // Unfortunate --- For // backwards compat. diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolverTest.java index a3b9e25e99225..b5c0531db792b 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolverTest.java @@ -56,7 +56,7 @@ public void testGetSuccessInput() throws Exception { new CriterionArray( ImmutableList.of( new Criterion() - .setField("type.keyword") + .setField("type") .setValue(DataHubViewType.GLOBAL.toString()) .setValues( new StringArray( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolverTest.java index 99b0e76976748..85d24f9251eaa 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolverTest.java @@ -58,7 +58,7 @@ public void testGetSuccessInput1() throws Exception { new CriterionArray( ImmutableList.of( new Criterion() - .setField("createdBy.keyword") + .setField("createdBy") .setValue(TEST_USER.toString()) .setValues( new StringArray( @@ -67,7 +67,7 @@ public void testGetSuccessInput1() throws Exception { .setCondition(Condition.EQUAL) .setNegated(false), new Criterion() - .setField("type.keyword") + .setField("type") .setValue(DataHubViewType.GLOBAL.toString()) .setValues( new StringArray( @@ -124,7 +124,7 @@ public void testGetSuccessInput2() throws Exception { new CriterionArray( ImmutableList.of( new Criterion() - .setField("createdBy.keyword") + .setField("createdBy") .setValue(TEST_USER.toString()) .setValues( new StringArray( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolverTest.java index fe32bec01fd7e..86a502b40b936 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolverTest.java @@ -111,7 +111,7 @@ public void testGetSuccessGlobalViewIsCreator() throws Exception { ImmutableList.of( new Criterion() .setCondition(Condition.EQUAL) - .setField("test1.keyword") + .setField("test1") .setValue( "value1") // Unfortunate --- For // backwards compat. @@ -122,7 +122,7 @@ public void testGetSuccessGlobalViewIsCreator() throws Exception { .setNegated(false), new Criterion() .setCondition(Condition.IN) - .setField("test2.keyword") + .setField("test2") .setValue( "value1") // Unfortunate --- For // backwards compat. @@ -175,7 +175,7 @@ public void testGetSuccessGlobalViewManageGlobalViews() throws Exception { ImmutableList.of( new Criterion() .setCondition(Condition.EQUAL) - .setField("test1.keyword") + .setField("test1") .setValue( "value1") // Unfortunate --- For // backwards compat. @@ -186,7 +186,7 @@ public void testGetSuccessGlobalViewManageGlobalViews() throws Exception { .setNegated(false), new Criterion() .setCondition(Condition.IN) - .setField("test2.keyword") + .setField("test2") .setValue( "value1") // Unfortunate --- For // backwards compat. diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java index 701ddd84c173e..d142be1321a5c 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java @@ -154,8 +154,7 @@ public void testMapDefinition() throws Exception { new StringArray( ImmutableList.of("value1", "value2"))) .setValue("value1") // Disgraceful - .setField( - "test1.keyword") // Consider whether we + .setField("test1") // Consider whether we // should NOT go through // the keyword mapping. .setCondition(Condition.IN), @@ -165,8 +164,7 @@ public void testMapDefinition() throws Exception { new StringArray( ImmutableList.of("value3", "value4"))) .setValue("value3") // Disgraceful - .setField( - "test2.keyword") // Consider whether we + .setField("test2") // Consider whether we // should NOT go through // the keyword mapping. .setCondition(Condition.CONTAIN)))))))); diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/CreateAspectTableStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/CreateAspectTableStep.java index 4855cef95cb6e..63b319d943a80 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/CreateAspectTableStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/CreateAspectTableStep.java @@ -77,7 +77,7 @@ public Function executable() { } try { - _server.execute(_server.createSqlUpdate(sqlUpdateStr)); + _server.execute(_server.sqlUpdate(sqlUpdateStr)); } catch (Exception e) { context.report().addLine("Failed to create table metadata_aspect_v2", e); return new DefaultUpgradeStepResult(id(), DataHubUpgradeState.FAILED); diff --git a/docker/datahub-ingestion-base/Dockerfile b/docker/datahub-ingestion-base/Dockerfile index 92b1762099882..08cf2efdcb6a1 100644 --- a/docker/datahub-ingestion-base/Dockerfile +++ b/docker/datahub-ingestion-base/Dockerfile @@ -43,7 +43,9 @@ RUN apt-get update && apt-get upgrade -y \ krb5-user \ krb5-config \ libkrb5-dev \ + librdkafka-dev \ wget \ + curl \ zip \ unzip \ ldap-utils \ diff --git a/docker/kafka-setup/kafka-config.sh b/docker/kafka-setup/kafka-config.sh index 4d5698ccc3856..cc714fa53b84d 100644 --- a/docker/kafka-setup/kafka-config.sh +++ b/docker/kafka-setup/kafka-config.sh @@ -8,7 +8,8 @@ : ${DATAHUB_ANALYTICS_ENABLED:=true} -export KAFKA_HEAP_OPTS="-Xmx64M" +: ${KAFKA_HEAP_OPTS:=-Xmx64M} +export KAFKA_HEAP_OPTS CONNECTION_PROPERTIES_PATH=/tmp/connection.properties diff --git a/docs/how/restore-indices.md b/docs/how/restore-indices.md index 4b7fd77314ef7..368b385ae5ea5 100644 --- a/docs/how/restore-indices.md +++ b/docs/how/restore-indices.md @@ -7,24 +7,39 @@ When a new version of the aspect gets ingested, GMS initiates an MAE event for t the search and graph indices. As such, we can fetch the latest version of each aspect in the local database and produce MAE events corresponding to the aspects to restore the search and graph indices. +By default, restoring the indices from the local database will not remove any existing documents in +the search and graph indices that no longer exist in the local database, potentially leading to inconsistencies +between the search and graph indices and the local database. + ## Quickstart -If you're using the quickstart images, you can use the `datahub` cli to restore indices. +If you're using the quickstart images, you can use the `datahub` cli to restore the indices. -``` +```shell datahub docker quickstart --restore-indices ``` -See [this section](../quickstart.md#restoring-only-the-index-use-with-care) for more information. + +:::info +Using the `datahub` CLI to restore the indices when using the quickstart images will also clear the search and graph indices before restoring. + +See [this section](../quickstart.md#restore-datahub) for more information. ## Docker-compose -If you are on a custom docker-compose deployment, run the following command (you need to checkout [the source repository](https://github.com/datahub-project/datahub)) from the root of the repo to send MAE for each aspect in the Local DB. +If you are on a custom docker-compose deployment, run the following command (you need to checkout [the source repository](https://github.com/datahub-project/datahub)) from the root of the repo to send MAE for each aspect in the local database. -``` +```shell ./docker/datahub-upgrade/datahub-upgrade.sh -u RestoreIndices ``` -If you need to clear the search and graph indices before restoring, add `-a clean` to the end of the command. +:::info +By default this command will not clear the search and graph indices before restoring, thous potentially leading to inconsistencies between the local database and the indices, in case aspects were previously deleted in the local database but were not removed from the correponding index. + +If you need to clear the search and graph indices before restoring, add `-a clean` to the end of the command. Please take note that the search and graph services might not be fully functional during reindexing when the indices are cleared. + +```shell +./docker/datahub-upgrade/datahub-upgrade.sh -u RestoreIndices -a clean +``` Refer to this [doc](../../docker/datahub-upgrade/README.md#environment-variables) on how to set environment variables for your environment. @@ -44,11 +59,31 @@ If not, deploy latest helm charts to use this functionality. Once restore indices job template has been deployed, run the following command to start a job that restores indices. -``` +```shell kubectl create job --from=cronjob/datahub-datahub-restore-indices-job-template datahub-restore-indices-adhoc ``` -Once the job completes, your indices will have been restored. +Once the job completes, your indices will have been restored. + +:::info +By default the restore indices job template will not clear the search and graph indices before restoring, thous potentially leading to inconsistencies between the local database and the indices, in case aspects were previously deleted in the local database but were not removed from the correponding index. + +If you need to clear the search and graph indices before restoring, modify the `values.yaml` for your deployment and overwrite the default arguments of the restore indices job template to include the `-a clean` argument. Please take note that the search and graph services might not be fully functional during reindexing when the indices are cleared. + +```yaml +datahubUpgrade: + restoreIndices: + image: + args: + - "-u" + - "RestoreIndices" + - "-a" + - "batchSize=1000" # default value of datahubUpgrade.batchSize + - "-a" + - "batchDelayMs=100" # default value of datahubUpgrade.batchDelayMs + - "-a" + - "clean" +``` ## Through API diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index abb6bcd32a554..d8a6e4c6bdca0 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -37,6 +37,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe Re-running with stateful ingestion should automatically clear up the entities with old URNS and add entities with new URNs, therefore not duplicating the containers or jobs. - #11313 - `datahub get` will no longer return a key aspect for entities that don't exist. +- #11369 - The default datahub-rest sink mode has been changed to `ASYNC_BATCH`. This requires a server with version 0.14.0+. ### Potential Downtime diff --git a/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java b/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java index 8b043569dd16a..3032b31cd26ec 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java @@ -189,7 +189,7 @@ private void validateTestEntityInfo(final AspectSpec testEntityInfo) { testEntityInfo.getPegasusSchema().getFullName()); // Assert on Searchable Fields - assertEquals(testEntityInfo.getSearchableFieldSpecs().size(), 12); + assertEquals(testEntityInfo.getSearchableFieldSpecs().size(), 17); assertEquals( "customProperties", testEntityInfo diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py index d3727e41bb378..123b74fee74b5 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py @@ -73,7 +73,9 @@ def hookimpl(f: _F) -> _F: # type: ignore[misc] # noqa: F811 "true", "1", ) -_RUN_IN_THREAD_TIMEOUT = 30 +_RUN_IN_THREAD_TIMEOUT = float( + os.getenv("DATAHUB_AIRFLOW_PLUGIN_RUN_IN_THREAD_TIMEOUT", 15) +) _DATAHUB_CLEANUP_DAG = "Datahub_Cleanup" @@ -129,12 +131,22 @@ def wrapper(*args, **kwargs): ) thread.start() - thread.join(timeout=_RUN_IN_THREAD_TIMEOUT) - if thread.is_alive(): - logger.warning( - f"Thread for {f.__name__} is still running after {_RUN_IN_THREAD_TIMEOUT} seconds. " - "Continuing without waiting for it to finish." - ) + if _RUN_IN_THREAD_TIMEOUT > 0: + # If _RUN_IN_THREAD_TIMEOUT is 0, we just kick off the thread and move on. + # Because it's a daemon thread, it'll be automatically killed when the main + # thread exists. + + start_time = time.time() + thread.join(timeout=_RUN_IN_THREAD_TIMEOUT) + if thread.is_alive(): + logger.warning( + f"Thread for {f.__name__} is still running after {_RUN_IN_THREAD_TIMEOUT} seconds. " + "Continuing without waiting for it to finish." + ) + else: + logger.debug( + f"Thread for {f.__name__} finished after {time.time() - start_time} seconds" + ) else: f(*args, **kwargs) except Exception as e: diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json index 95f793ba69538..fd10f858d00fb 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json @@ -18,7 +18,8 @@ "timezone": "Timezone('UTC')" }, "externalUrl": "http://airflow.example.com/tree?dag_id=basic_iolets", - "name": "basic_iolets" + "name": "basic_iolets", + "env": "PROD" } } }, @@ -92,7 +93,8 @@ "name": "run_data_task", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -244,34 +246,6 @@ } } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)", - "changeType": "UPSERT", - "aspectName": "dataJobInfo", - "aspect": { - "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'run_data_task'", - "execution_timeout": "None", - "sla": "None", - "task_id": "'run_data_task'", - "trigger_rule": "'all_success'", - "wait_for_downstream": "False", - "downstream_task_ids": "[]", - "inlets": "[]", - "outlets": "[]" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task", - "name": "run_data_task", - "type": { - "string": "COMMAND" - } - } - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)", @@ -572,8 +546,8 @@ "json": { "timestampMillis": 1717180290951, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 2 @@ -589,8 +563,8 @@ "json": { "timestampMillis": 1717180291140, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json index 191ea72e9f048..5c5be6848fd83 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json @@ -19,7 +19,8 @@ }, "externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag", "name": "simple_dag", - "description": "A simple DAG that runs a few fake data tasks." + "description": "A simple DAG that runs a few fake data tasks.", + "env": "PROD" } } }, @@ -93,7 +94,8 @@ "name": "task_1", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -203,34 +205,6 @@ } } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)", - "changeType": "UPSERT", - "aspectName": "dataJobInfo", - "aspect": { - "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'task_1'", - "execution_timeout": "None", - "sla": "None", - "task_id": "'task_1'", - "trigger_rule": "'all_success'", - "wait_for_downstream": "False", - "downstream_task_ids": "['run_another_data_task']", - "inlets": "[]", - "outlets": "[]" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1", - "name": "task_1", - "type": { - "string": "COMMAND" - } - } - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)", @@ -447,8 +421,8 @@ "json": { "timestampMillis": 1717180227827, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 2 @@ -464,8 +438,8 @@ "json": { "timestampMillis": 1717180228022, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -475,30 +449,6 @@ } } }, -{ - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)", - "changeType": "UPSERT", - "aspectName": "dataFlowInfo", - "aspect": { - "json": { - "customProperties": { - "_access_control": "None", - "catchup": "False", - "description": "'A simple DAG that runs a few fake data tasks.'", - "doc_md": "None", - "fileloc": "", - "is_paused_upon_creation": "None", - "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", - "tags": "None", - "timezone": "Timezone('UTC')" - }, - "externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag", - "name": "simple_dag", - "description": "A simple DAG that runs a few fake data tasks." - } - } -}, { "entityType": "dataFlow", "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)", @@ -558,7 +508,8 @@ "name": "run_another_data_task", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -624,34 +575,6 @@ } } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)", - "changeType": "UPSERT", - "aspectName": "dataJobInfo", - "aspect": { - "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'run_another_data_task'", - "execution_timeout": "None", - "sla": "None", - "task_id": "'run_another_data_task'", - "trigger_rule": "'all_success'", - "wait_for_downstream": "False", - "downstream_task_ids": "[]", - "inlets": "[]", - "outlets": "[]" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task", - "name": "run_another_data_task", - "type": { - "string": "COMMAND" - } - } - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)", @@ -758,8 +681,8 @@ "json": { "timestampMillis": 1717180231676, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 2 @@ -775,8 +698,8 @@ "json": { "timestampMillis": 1717180231824, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json index e2f738d19d89a..66bbb5a3608dd 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json @@ -18,7 +18,8 @@ "timezone": "Timezone('UTC')" }, "externalUrl": "http://airflow.example.com/tree?dag_id=basic_iolets", - "name": "basic_iolets" + "name": "basic_iolets", + "env": "PROD" } } }, @@ -113,13 +114,14 @@ "downstream_task_ids": "[]", "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='DEV', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableC', env='PROD', platform_instance='cloud'), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)'), Urn(_urn='urn:li:dataJob:(urn:li:dataFlow:(airflow,test_dag,PROD),test_task)')]", "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableE', env='PROD', platform_instance=None)]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"run_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"run_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task", "name": "run_data_task", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -423,8 +425,8 @@ "json": { "timestampMillis": 1717179624988, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 @@ -440,8 +442,8 @@ "json": { "timestampMillis": 1717179625524, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:airflow", "operationType": "CREATE", @@ -458,8 +460,8 @@ "json": { "timestampMillis": 1717179625547, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:airflow", "operationType": "CREATE", @@ -467,35 +469,6 @@ } } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)", - "changeType": "UPSERT", - "aspectName": "dataJobInfo", - "aspect": { - "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'run_data_task'", - "execution_timeout": "None", - "sla": "None", - "task_id": "'run_data_task'", - "trigger_rule": "", - "wait_for_downstream": "False", - "downstream_task_ids": "[]", - "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='DEV', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableC', env='PROD', platform_instance='cloud'), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)'), Urn(_urn='urn:li:dataJob:(urn:li:dataFlow:(airflow,test_dag,PROD),test_task)')]", - "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableE', env='PROD', platform_instance=None)]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"run_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task", - "name": "run_data_task", - "type": { - "string": "COMMAND" - } - } - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)", @@ -642,8 +615,8 @@ "json": { "timestampMillis": 1717179625632, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json index 7b1591bdf7308..44312125d793e 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json @@ -18,7 +18,8 @@ "timezone": "Timezone('UTC')" }, "externalUrl": "http://airflow.example.com/tree?dag_id=basic_iolets", - "name": "basic_iolets" + "name": "basic_iolets", + "env": "PROD" } } }, @@ -113,13 +114,14 @@ "downstream_task_ids": "[]", "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='DEV', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableC', env='PROD', platform_instance='cloud'), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)'), Urn(_urn='urn:li:dataJob:(urn:li:dataFlow:(airflow,test_dag,PROD),test_task)')]", "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableE', env='PROD', platform_instance=None)]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"run_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"run_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task", "name": "run_data_task", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -423,8 +425,8 @@ "json": { "timestampMillis": 1717180006234, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 @@ -440,8 +442,8 @@ "json": { "timestampMillis": 1717180006652, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:airflow", "operationType": "CREATE", @@ -458,8 +460,8 @@ "json": { "timestampMillis": 1717180006674, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:airflow", "operationType": "CREATE", @@ -467,35 +469,6 @@ } } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)", - "changeType": "UPSERT", - "aspectName": "dataJobInfo", - "aspect": { - "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'run_data_task'", - "execution_timeout": "None", - "sla": "None", - "task_id": "'run_data_task'", - "trigger_rule": "", - "wait_for_downstream": "False", - "downstream_task_ids": "[]", - "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='DEV', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableC', env='PROD', platform_instance='cloud'), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)'), Urn(_urn='urn:li:dataJob:(urn:li:dataFlow:(airflow,test_dag,PROD),test_task)')]", - "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableE', env='PROD', platform_instance=None)]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"run_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=basic_iolets&_flt_3_task_id=run_data_task", - "name": "run_data_task", - "type": { - "string": "COMMAND" - } - } - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)", @@ -642,8 +615,8 @@ "json": { "timestampMillis": 1717180006942, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag.json index b81466930ed41..2043c163d348e 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag.json @@ -19,7 +19,8 @@ }, "externalUrl": "http://airflow.example.com/tree?dag_id=custom_operator_dag", "name": "custom_operator_dag", - "description": "An example dag with custom operator" + "description": "An example dag with custom operator", + "env": "PROD" } } }, @@ -129,13 +130,14 @@ "downstream_task_ids": "[]", "inlets": "[]", "outlets": "[]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"custom_operator.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"unusual_prefix_eab30e0aa660e38948d8dc30c6e600c764319a2e_custom_operator_dag.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=custom_operator_dag&_flt_3_task_id=custom_task_id", "name": "custom_task_id", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -272,13 +274,14 @@ "downstream_task_ids": "[]", "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='PROD', platform_instance=None)]", "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"custom_operator.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"unusual_prefix_eab30e0aa660e38948d8dc30c6e600c764319a2e_custom_operator_dag.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=custom_operator_dag&_flt_3_task_id=custom_task_id", "name": "custom_task_id", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag_no_dag_listener.json index 019122600aedb..eed01f2b2d7b8 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_custom_operator_dag_no_dag_listener.json @@ -19,7 +19,8 @@ }, "externalUrl": "http://airflow.example.com/tree?dag_id=custom_operator_dag", "name": "custom_operator_dag", - "description": "An example dag with custom operator" + "description": "An example dag with custom operator", + "env": "PROD" } } }, @@ -129,13 +130,14 @@ "downstream_task_ids": "[]", "inlets": "[]", "outlets": "[]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"custom_operator.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"unusual_prefix_eab30e0aa660e38948d8dc30c6e600c764319a2e_custom_operator_dag.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=custom_operator_dag&_flt_3_task_id=custom_task_id", "name": "custom_task_id", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -274,13 +276,14 @@ "downstream_task_ids": "[]", "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Dataset(platform='snowflake', name='mydb.schema.tableB', env='PROD', platform_instance=None)]", "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"custom_operator.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"CustomOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"unusual_prefix_eab30e0aa660e38948d8dc30c6e600c764319a2e_custom_operator_dag.CustomOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"custom_task_id\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=custom_operator_dag&_flt_3_task_id=custom_task_id", "name": "custom_task_id", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json index 0828c5e5aa1f7..2cab688446617 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json @@ -19,7 +19,8 @@ }, "externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag", "name": "simple_dag", - "description": "A simple DAG that runs a few fake data tasks." + "description": "A simple DAG that runs a few fake data tasks.", + "env": "PROD" } } }, @@ -114,13 +115,14 @@ "downstream_task_ids": "['run_another_data_task']", "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)'), Urn(_urn='urn:li:dataJob:(urn:li:dataFlow:(airflow,test_dag,PROD),test_task)')]", "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"['run_another_data_task']\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"task_1\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"['run_another_data_task']\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"task_1\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1", "name": "task_1", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -351,8 +353,8 @@ "json": { "timestampMillis": 1717179559032, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 @@ -368,8 +370,8 @@ "json": { "timestampMillis": 1717179559525, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:airflow", "operationType": "CREATE", @@ -377,35 +379,6 @@ } } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)", - "changeType": "UPSERT", - "aspectName": "dataJobInfo", - "aspect": { - "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'task_1'", - "execution_timeout": "None", - "sla": "None", - "task_id": "'task_1'", - "trigger_rule": "", - "wait_for_downstream": "False", - "downstream_task_ids": "['run_another_data_task']", - "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)'), Urn(_urn='urn:li:dataJob:(urn:li:dataFlow:(airflow,test_dag,PROD),test_task)')]", - "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"['run_another_data_task']\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"task_1\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1", - "name": "task_1", - "type": { - "string": "COMMAND" - } - } - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)", @@ -510,8 +483,8 @@ "json": { "timestampMillis": 1717179559610, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -540,13 +513,14 @@ "downstream_task_ids": "[]", "inlets": "[]", "outlets": "[]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"run_another_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"['task_1']\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"run_another_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"['task_1']\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task", "name": "run_another_data_task", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -656,43 +630,14 @@ "json": { "timestampMillis": 1717179564453, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 } } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)", - "changeType": "UPSERT", - "aspectName": "dataJobInfo", - "aspect": { - "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'run_another_data_task'", - "execution_timeout": "None", - "sla": "None", - "task_id": "'run_another_data_task'", - "trigger_rule": "", - "wait_for_downstream": "False", - "downstream_task_ids": "[]", - "inlets": "[]", - "outlets": "[]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"run_another_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"['task_1']\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task", - "name": "run_another_data_task", - "type": { - "string": "COMMAND" - } - } - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)", @@ -753,8 +698,8 @@ "json": { "timestampMillis": 1717179564937, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json index c7cd245cc0f02..b381dd55275d8 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json @@ -19,7 +19,8 @@ }, "externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag", "name": "simple_dag", - "description": "A simple DAG that runs a few fake data tasks." + "description": "A simple DAG that runs a few fake data tasks.", + "env": "PROD" } } }, @@ -114,13 +115,14 @@ "downstream_task_ids": "['run_another_data_task']", "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)'), Urn(_urn='urn:li:dataJob:(urn:li:dataFlow:(airflow,test_dag,PROD),test_task)')]", "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"['run_another_data_task']\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"task_1\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"['run_another_data_task']\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"task_1\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1", "name": "task_1", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -351,8 +353,8 @@ "json": { "timestampMillis": 1717179933913, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 @@ -368,8 +370,8 @@ "json": { "timestampMillis": 1717179934145, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:airflow", "operationType": "CREATE", @@ -377,35 +379,6 @@ } } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)", - "changeType": "UPSERT", - "aspectName": "dataJobInfo", - "aspect": { - "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'task_1'", - "execution_timeout": "None", - "sla": "None", - "task_id": "'task_1'", - "trigger_rule": "", - "wait_for_downstream": "False", - "downstream_task_ids": "['run_another_data_task']", - "inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)'), Urn(_urn='urn:li:dataJob:(urn:li:dataFlow:(airflow,test_dag,PROD),test_task)')]", - "outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"['run_another_data_task']\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"task_1\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1", - "name": "task_1", - "type": { - "string": "COMMAND" - } - } - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)", @@ -510,8 +483,8 @@ "json": { "timestampMillis": 1717179934378, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -521,30 +494,6 @@ } } }, -{ - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)", - "changeType": "UPSERT", - "aspectName": "dataFlowInfo", - "aspect": { - "json": { - "customProperties": { - "_access_control": "None", - "catchup": "False", - "description": "'A simple DAG that runs a few fake data tasks.'", - "doc_md": "None", - "fileloc": "", - "is_paused_upon_creation": "None", - "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", - "tags": "[]", - "timezone": "Timezone('UTC')" - }, - "externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag", - "name": "simple_dag", - "description": "A simple DAG that runs a few fake data tasks." - } - } -}, { "entityType": "dataFlow", "entityUrn": "urn:li:dataFlow:(airflow,simple_dag,prod)", @@ -599,13 +548,14 @@ "downstream_task_ids": "[]", "inlets": "[]", "outlets": "[]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"run_another_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"['task_1']\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"run_another_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"['task_1']\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task", "name": "run_another_data_task", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -715,43 +665,14 @@ "json": { "timestampMillis": 1717179938499, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 } } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)", - "changeType": "UPSERT", - "aspectName": "dataJobInfo", - "aspect": { - "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'run_another_data_task'", - "execution_timeout": "None", - "sla": "None", - "task_id": "'run_another_data_task'", - "trigger_rule": "", - "wait_for_downstream": "False", - "downstream_task_ids": "[]", - "inlets": "[]", - "outlets": "[]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"unknownItems\": [{\"name\": \"BashOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"mapped\": false, \"operator_class\": \"airflow.operators.bash.BashOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"run_another_data_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"['task_1']\", \"wait_for_downstream\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task", - "name": "run_another_data_task", - "type": { - "string": "COMMAND" - } - } - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),run_another_data_task)", @@ -812,8 +733,8 @@ "json": { "timestampMillis": 1717179939057, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json index b819379395a04..3f55cc717685a 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json @@ -18,7 +18,8 @@ "timezone": "Timezone('UTC')" }, "externalUrl": "http://airflow.example.com/tree?dag_id=snowflake_operator", - "name": "snowflake_operator" + "name": "snowflake_operator", + "env": "PROD" } } }, @@ -114,13 +115,14 @@ "downstream_task_ids": "[]", "inlets": "[]", "outlets": "[]", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE OR REPLACE TABLE processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n \"}" + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE OR REPLACE TABLE processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n \"}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=snowflake_operator&_flt_3_task_id=transform_cost_table", "name": "transform_cost_table", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -367,8 +369,8 @@ "json": { "timestampMillis": 1717179684292, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED", "attempt": 1 @@ -384,8 +386,8 @@ "json": { "timestampMillis": 1717179684935, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:airflow", "operationType": "CREATE", @@ -393,36 +395,6 @@ } } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)", - "changeType": "UPSERT", - "aspectName": "dataJobInfo", - "aspect": { - "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'transform_cost_table'", - "execution_timeout": "None", - "sla": "None", - "sql": "'\\n CREATE OR REPLACE TABLE processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n '", - "task_id": "'transform_cost_table'", - "trigger_rule": "", - "wait_for_downstream": "False", - "downstream_task_ids": "[]", - "inlets": "[]", - "outlets": "[]", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE OR REPLACE TABLE processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n \"}" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=snowflake_operator&_flt_3_task_id=transform_cost_table", - "name": "transform_cost_table", - "type": { - "string": "COMMAND" - } - } - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)", @@ -568,8 +540,8 @@ "json": { "timestampMillis": 1717179685374, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json index 4bc34b7b0d3ce..bf94a5dd73107 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json @@ -18,7 +18,8 @@ "timezone": "Timezone('UTC')" }, "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator", - "name": "sqlite_operator" + "name": "sqlite_operator", + "env": "PROD" } } }, @@ -114,13 +115,14 @@ "downstream_task_ids": "['populate_cost_table']", "inlets": "[]", "outlets": "[]", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}" + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table", "name": "create_cost_table", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -376,36 +378,6 @@ } } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)", - "changeType": "UPSERT", - "aspectName": "dataJobInfo", - "aspect": { - "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'create_cost_table'", - "execution_timeout": "None", - "sla": "None", - "sql": "'\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n '", - "task_id": "'create_cost_table'", - "trigger_rule": "", - "wait_for_downstream": "False", - "downstream_task_ids": "['populate_cost_table']", - "inlets": "[]", - "outlets": "[]", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table", - "name": "create_cost_table", - "type": { - "string": "COMMAND" - } - } - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)", @@ -584,14 +556,15 @@ "inlets": "[]", "outlets": "[]", "datahub_sql_parser_error": "SqlUnderstandingError: Failed to build scope for statement - scope was empty: INSERT INTO public.costs (id, month, total_cost, area) VALUES (1, '2021-01', 100, 10), (2, '2021-02', 200, 20), (3, '2021-03', 300, 30)", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n INSERT INTO costs (id, month, total_cost, area)\\n VALUES\\n (1, '2021-01', 100, 10),\\n (2, '2021-02', 200, 20),\\n (3, '2021-03', 300, 30)\\n \"}", - "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Failed to build scope for statement - scope was empty: INSERT INTO public.costs (id, month, total_cost, area) VALUES (1, '2021-01', 100, 10), (2, '2021-02', 200, 20), (3, '2021-03', 300, 30)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n INSERT INTO costs (id, month, total_cost, area)\\n VALUES\\n (1, '2021-01', 100, 10),\\n (2, '2021-02', 200, 20),\\n (3, '2021-03', 300, 30)\\n \"}", + "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Failed to build scope for statement - scope was empty: INSERT INTO public.costs (id, month, total_cost, area) VALUES (1, '2021-01', 100, 10), (2, '2021-02', 200, 20), (3, '2021-03', 300, 30)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=populate_cost_table", "name": "populate_cost_table", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -768,38 +741,6 @@ } } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", - "changeType": "UPSERT", - "aspectName": "dataJobInfo", - "aspect": { - "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'populate_cost_table'", - "execution_timeout": "None", - "sla": "None", - "sql": "\"\\n INSERT INTO costs (id, month, total_cost, area)\\n VALUES\\n (1, '2021-01', 100, 10),\\n (2, '2021-02', 200, 20),\\n (3, '2021-03', 300, 30)\\n \"", - "task_id": "'populate_cost_table'", - "trigger_rule": "", - "wait_for_downstream": "False", - "downstream_task_ids": "['transform_cost_table']", - "inlets": "[]", - "outlets": "[]", - "datahub_sql_parser_error": "SqlUnderstandingError: Failed to build scope for statement - scope was empty: INSERT INTO public.costs (id, month, total_cost, area) VALUES (1, '2021-01', 100, 10), (2, '2021-02', 200, 20), (3, '2021-03', 300, 30)", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n INSERT INTO costs (id, month, total_cost, area)\\n VALUES\\n (1, '2021-01', 100, 10),\\n (2, '2021-02', 200, 20),\\n (3, '2021-03', 300, 30)\\n \"}", - "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Failed to build scope for statement - scope was empty: INSERT INTO public.costs (id, month, total_cost, area) VALUES (1, '2021-01', 100, 10), (2, '2021-02', 200, 20), (3, '2021-03', 300, 30)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=populate_cost_table", - "name": "populate_cost_table", - "type": { - "string": "COMMAND" - } - } - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", @@ -906,13 +847,14 @@ "downstream_task_ids": "['cleanup_costs', 'cleanup_processed_costs']", "inlets": "[]", "outlets": "[]", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n \"}" + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n \"}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=transform_cost_table", "name": "transform_cost_table", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -1187,36 +1129,6 @@ } } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", - "changeType": "UPSERT", - "aspectName": "dataJobInfo", - "aspect": { - "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'transform_cost_table'", - "execution_timeout": "None", - "sla": "None", - "sql": "'\\n CREATE TABLE IF NOT EXISTS processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n '", - "task_id": "'transform_cost_table'", - "trigger_rule": "", - "wait_for_downstream": "False", - "downstream_task_ids": "['cleanup_costs', 'cleanup_processed_costs']", - "inlets": "[]", - "outlets": "[]", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n \"}" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=transform_cost_table", - "name": "transform_cost_table", - "type": { - "string": "COMMAND" - } - } - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", @@ -1452,14 +1364,15 @@ "inlets": "[]", "outlets": "[]", "datahub_sql_parser_error": "UnsupportedStatementTypeError: Can only generate column-level lineage for select-like inner statements, not (outer statement type: )", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n DROP TABLE costs\\n \"}", - "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n DROP TABLE costs\\n \"}", + "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_costs", "name": "cleanup_costs", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -1729,14 +1642,15 @@ "inlets": "[]", "outlets": "[]", "datahub_sql_parser_error": "UnsupportedStatementTypeError: Can only generate column-level lineage for select-like inner statements, not (outer statement type: )", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n DROP TABLE processed_costs\\n \"}", - "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.12.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n DROP TABLE processed_costs\\n \"}", + "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_processed_costs", "name": "cleanup_processed_costs", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json index 99bda0e0f2569..87e76b6417ff1 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json @@ -18,7 +18,19 @@ "timezone": "Timezone('UTC')" }, "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator", - "name": "sqlite_operator" + "name": "sqlite_operator", + "env": "PROD" + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false } } }, @@ -80,23 +92,19 @@ } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "sqlite_operator" - } - ] + "removed": false } } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -107,7 +115,18 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -116,6 +135,21 @@ } } }, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "sqlite_operator" + } + ] + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)", @@ -135,13 +169,26 @@ "wait_for_downstream": "False", "downstream_task_ids": "['populate_cost_table']", "inlets": "[]", - "outlets": "[]" + "outlets": "[]", + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table", "name": "create_cost_table", "type": { "string": "COMMAND" - } + }, + "env": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false } } }, @@ -198,6 +245,19 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.costs", + "origin": "PROD" + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)", @@ -222,41 +282,6 @@ } } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", - "changeType": "UPSERT", - "aspectName": "datasetKey", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:sqlite", - "name": "public.costs", - "origin": "PROD" - } - } -}, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)", @@ -296,7 +321,7 @@ "name": "sqlite_operator_create_cost_table_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1717180072004, + "time": 1726599435467, "actor": "urn:li:corpuser:datahub" } } @@ -327,6 +352,19 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.costs", + "origin": "PROD" + } + } +}, { "entityType": "dataProcessInstance", "entityUrn": "urn:li:dataProcessInstance:fbeed1180fa0434e02ac6f75ace87869", @@ -334,7 +372,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1717180072004, + "timestampMillis": 1726599435467, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -344,6 +382,24 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1726599436099, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1726599436099 + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)", @@ -363,13 +419,115 @@ "wait_for_downstream": "False", "downstream_task_ids": "['populate_cost_table']", "inlets": "[]", - "outlets": "[]" + "outlets": "[]", + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table", "name": "create_cost_table", "type": { "string": "COMMAND" - } + }, + "env": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" + ], + "inputDatajobs": [], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "confidenceScore": 1.0 + } + ] } } }, @@ -377,17 +535,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", "changeType": "UPSERT", - "aspectName": "operation", + "aspectName": "datasetKey", "aspect": { "json": { - "timestampMillis": 1719864194882, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "actor": "urn:li:corpuser:airflow", - "operationType": "CREATE", - "lastUpdatedTimestamp": 1719864194882 + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.costs", + "origin": "PROD" } } }, @@ -433,7 +586,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1717180072275, + "timestampMillis": 1726599436722, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -465,7 +618,19 @@ "timezone": "Timezone('UTC')" }, "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator", - "name": "sqlite_operator" + "name": "sqlite_operator", + "env": "PROD" + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false } } }, @@ -504,6 +669,87 @@ } } }, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "sqlite_operator" + } + ] + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", @@ -523,13 +769,28 @@ "wait_for_downstream": "False", "downstream_task_ids": "['transform_cost_table']", "inlets": "[]", - "outlets": "[]" + "outlets": "[]", + "datahub_sql_parser_error": "SqlUnderstandingError: Failed to build scope for statement - scope was empty: INSERT INTO public.costs (id, month, total_cost, area) VALUES (1, '2021-01', 100, 10), (2, '2021-02', 200, 20), (3, '2021-03', 300, 30)", + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n INSERT INTO costs (id, month, total_cost, area)\\n VALUES\\n (1, '2021-01', 100, 10),\\n (2, '2021-02', 200, 20),\\n (3, '2021-03', 300, 30)\\n \"}", + "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Failed to build scope for statement - scope was empty: INSERT INTO public.costs (id, month, total_cost, area) VALUES (1, '2021-01', 100, 10), (2, '2021-02', 200, 20), (3, '2021-03', 300, 30)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=populate_cost_table", "name": "populate_cost_table", "type": { "string": "COMMAND" - } + }, + "env": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false } } }, @@ -551,6 +812,19 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.costs", + "origin": "PROD" + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", @@ -614,7 +888,7 @@ "name": "sqlite_operator_populate_cost_table_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1717180078196, + "time": 1726599440750, "actor": "urn:li:corpuser:datahub" } } @@ -632,6 +906,32 @@ } } }, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.costs", + "origin": "PROD" + } + } +}, { "entityType": "dataProcessInstance", "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92", @@ -639,7 +939,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1717180078196, + "timestampMillis": 1726599440750, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -649,6 +949,24 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1726599441965, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1726599441965 + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", @@ -668,13 +986,59 @@ "wait_for_downstream": "False", "downstream_task_ids": "['transform_cost_table']", "inlets": "[]", - "outlets": "[]" + "outlets": "[]", + "datahub_sql_parser_error": "SqlUnderstandingError: Failed to build scope for statement - scope was empty: INSERT INTO public.costs (id, month, total_cost, area) VALUES (1, '2021-01', 100, 10), (2, '2021-02', 200, 20), (3, '2021-03', 300, 30)", + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n INSERT INTO costs (id, month, total_cost, area)\\n VALUES\\n (1, '2021-01', 100, 10),\\n (2, '2021-02', 200, 20),\\n (3, '2021-03', 300, 30)\\n \"}", + "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Failed to build scope for statement - scope was empty: INSERT INTO public.costs (id, month, total_cost, area) VALUES (1, '2021-01', 100, 10), (2, '2021-02', 200, 20), (3, '2021-03', 300, 30)\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=populate_cost_table", "name": "populate_cost_table", "type": { "string": "COMMAND" - } + }, + "env": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" + ], + "inputDatajobs": [ + "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)" + ], + "fineGrainedLineages": [] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.costs", + "origin": "PROD" } } }, @@ -720,7 +1084,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1717180078619, + "timestampMillis": 1726599442384, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -752,7 +1116,19 @@ "timezone": "Timezone('UTC')" }, "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator", - "name": "sqlite_operator" + "name": "sqlite_operator", + "env": "PROD" + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false } } }, @@ -791,45 +1167,139 @@ } } }, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)", "changeType": "UPSERT", - "aspectName": "dataJobInfo", + "aspectName": "status", "aspect": { "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'transform_cost_table'", - "execution_timeout": "None", - "sla": "None", - "sql": "'\\n CREATE TABLE IF NOT EXISTS processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n '", - "task_id": "'transform_cost_table'", - "trigger_rule": "", - "wait_for_downstream": "False", - "downstream_task_ids": "['cleanup_costs', 'cleanup_processed_costs']", - "inlets": "[]", - "outlets": "[]" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=transform_cost_table", - "name": "transform_cost_table", - "type": { - "string": "COMMAND" - } + "removed": false } } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", "changeType": "UPSERT", - "aspectName": "dataJobInputOutput", + "aspectName": "status", "aspect": { "json": { - "inputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" - ], + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "sqlite_operator" + } + ] + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "depends_on_past": "False", + "email": "None", + "label": "'transform_cost_table'", + "execution_timeout": "None", + "sla": "None", + "sql": "'\\n CREATE TABLE IF NOT EXISTS processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n '", + "task_id": "'transform_cost_table'", + "trigger_rule": "", + "wait_for_downstream": "False", + "downstream_task_ids": "['cleanup_costs', 'cleanup_processed_costs']", + "inlets": "[]", + "outlets": "[]", + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n \"}" + }, + "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=transform_cost_table", + "name": "transform_cost_table", + "type": { + "string": "COMMAND" + }, + "env": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" + ], "outputDatasets": [ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)" ], @@ -897,6 +1367,32 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.costs", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.processed_costs", + "origin": "PROD" + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", @@ -932,19 +1428,6 @@ } } }, -{ - "entityType": "dataProcessInstance", - "entityUrn": "urn:li:dataProcessInstance:04e1badac1eacd1c41123d07f579fa92", - "changeType": "UPSERT", - "aspectName": "dataProcessInstanceOutput", - "aspect": { - "json": { - "outputs": [ - "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" - ] - } - } -}, { "entityType": "dataProcessInstance", "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b", @@ -973,7 +1456,7 @@ "name": "sqlite_operator_transform_cost_table_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1717180084642, + "time": 1726599448285, "actor": "urn:li:corpuser:datahub" } } @@ -992,48 +1475,290 @@ } }, { - "entityType": "dataProcessInstance", - "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b", + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceInput", + "aspect": { + "json": { + "inputs": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" + ] + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)" + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.costs", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.processed_costs", + "origin": "PROD" + } + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1726599448285, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "status": "STARTED", + "attempt": 1 + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1726599450484, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1726599450484 + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "depends_on_past": "False", + "email": "None", + "label": "'transform_cost_table'", + "execution_timeout": "None", + "sla": "None", + "sql": "'\\n CREATE TABLE IF NOT EXISTS processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n '", + "task_id": "'transform_cost_table'", + "trigger_rule": "", + "wait_for_downstream": "False", + "downstream_task_ids": "['cleanup_costs', 'cleanup_processed_costs']", + "inlets": "[]", + "outlets": "[]", + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n \"}" + }, + "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=transform_cost_table", + "name": "transform_cost_table", + "type": { + "string": "COMMAND" + }, + "env": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)" + ], + "inputDatajobs": [ + "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)" + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),cost_per_area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD),cost_per_area)" + ], + "confidenceScore": 1.0 + } + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", "changeType": "UPSERT", - "aspectName": "dataProcessInstanceRunEvent", + "aspectName": "datasetKey", "aspect": { "json": { - "timestampMillis": 1717180084642, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "status": "STARTED", - "attempt": 1 + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.costs", + "origin": "PROD" } } }, { - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)", "changeType": "UPSERT", - "aspectName": "dataJobInfo", + "aspectName": "datasetKey", "aspect": { "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'transform_cost_table'", - "execution_timeout": "None", - "sla": "None", - "sql": "'\\n CREATE TABLE IF NOT EXISTS processed_costs AS\\n SELECT\\n id,\\n month,\\n total_cost,\\n area,\\n total_cost / area as cost_per_area\\n FROM costs\\n '", - "task_id": "'transform_cost_table'", - "trigger_rule": "", - "wait_for_downstream": "False", - "downstream_task_ids": "['cleanup_costs', 'cleanup_processed_costs']", - "inlets": "[]", - "outlets": "[]" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=transform_cost_table", - "name": "transform_cost_table", - "type": { - "string": "COMMAND" - } + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.processed_costs", + "origin": "PROD" } } }, @@ -1079,7 +1804,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1717180085266, + "timestampMillis": 1726599450950, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1111,7 +1836,19 @@ "timezone": "Timezone('UTC')" }, "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator", - "name": "sqlite_operator" + "name": "sqlite_operator", + "env": "PROD" + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false } } }, @@ -1150,6 +1887,87 @@ } } }, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "sqlite_operator" + } + ] + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)", @@ -1169,13 +1987,28 @@ "wait_for_downstream": "False", "downstream_task_ids": "[]", "inlets": "[]", - "outlets": "[]" + "outlets": "[]", + "datahub_sql_parser_error": "UnsupportedStatementTypeError: Can only generate column-level lineage for select-like inner statements, not (outer statement type: )", + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n DROP TABLE costs\\n \"}", + "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_costs", "name": "cleanup_costs", "type": { "string": "COMMAND" - } + }, + "env": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false } } }, @@ -1197,6 +2030,19 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.costs", + "origin": "PROD" + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)", @@ -1260,7 +2106,7 @@ "name": "sqlite_operator_cleanup_costs_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1717180091148, + "time": 1726599457411, "actor": "urn:li:corpuser:datahub" } } @@ -1278,6 +2124,32 @@ } } }, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.costs", + "origin": "PROD" + } + } +}, { "entityType": "dataProcessInstance", "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a", @@ -1285,7 +2157,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1717180091148, + "timestampMillis": 1726599457411, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1296,31 +2168,95 @@ } }, { - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1726599460313, + "partitionSpec": { + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1726599460313 + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "depends_on_past": "False", + "email": "None", + "label": "'cleanup_costs'", + "execution_timeout": "None", + "sla": "None", + "sql": "'\\n DROP TABLE costs\\n '", + "task_id": "'cleanup_costs'", + "trigger_rule": "", + "wait_for_downstream": "False", + "downstream_task_ids": "[]", + "inlets": "[]", + "outlets": "[]", + "datahub_sql_parser_error": "UnsupportedStatementTypeError: Can only generate column-level lineage for select-like inner statements, not (outer statement type: )", + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n DROP TABLE costs\\n \"}", + "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" + }, + "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_costs", + "name": "cleanup_costs", + "type": { + "string": "COMMAND" + }, + "env": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" + ], + "inputDatajobs": [ + "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)" + ], + "fineGrainedLineages": [] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", "changeType": "UPSERT", - "aspectName": "dataJobInfo", + "aspectName": "datasetKey", "aspect": { "json": { - "customProperties": { - "depends_on_past": "False", - "email": "None", - "label": "'cleanup_costs'", - "execution_timeout": "None", - "sla": "None", - "sql": "'\\n DROP TABLE costs\\n '", - "task_id": "'cleanup_costs'", - "trigger_rule": "", - "wait_for_downstream": "False", - "downstream_task_ids": "[]", - "inlets": "[]", - "outlets": "[]" - }, - "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_costs", - "name": "cleanup_costs", - "type": { - "string": "COMMAND" - } + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.costs", + "origin": "PROD" } } }, @@ -1366,7 +2302,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1717180091923, + "timestampMillis": 1726599460969, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1379,19 +2315,6 @@ } } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)", - "changeType": "UPSERT", - "aspectName": "datasetKey", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:sqlite", - "name": "public.processed_costs", - "origin": "PROD" - } - } -}, { "entityType": "dataFlow", "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", @@ -1411,7 +2334,19 @@ "timezone": "Timezone('UTC')" }, "externalUrl": "http://airflow.example.com/tree?dag_id=sqlite_operator", - "name": "sqlite_operator" + "name": "sqlite_operator", + "env": "PROD" + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false } } }, @@ -1450,6 +2385,87 @@ } } }, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_costs)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(airflow,sqlite_operator,prod)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "sqlite_operator" + } + ] + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)", @@ -1469,26 +2485,28 @@ "wait_for_downstream": "False", "downstream_task_ids": "[]", "inlets": "[]", - "outlets": "[]" + "outlets": "[]", + "datahub_sql_parser_error": "UnsupportedStatementTypeError: Can only generate column-level lineage for select-like inner statements, not (outer statement type: )", + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n DROP TABLE processed_costs\\n \"}", + "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_processed_costs", "name": "cleanup_processed_costs", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, { - "entityType": "dataProcessInstance", - "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b", + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)", "changeType": "UPSERT", - "aspectName": "dataProcessInstanceInput", + "aspectName": "status", "aspect": { "json": { - "inputs": [ - "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" - ] + "removed": false } } }, @@ -1511,15 +2529,15 @@ } }, { - "entityType": "dataProcessInstance", - "entityUrn": "urn:li:dataProcessInstance:64e5ff8f552e857b607832731e09808b", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)", "changeType": "UPSERT", - "aspectName": "dataProcessInstanceOutput", + "aspectName": "datasetKey", "aspect": { "json": { - "outputs": [ - "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)" - ] + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.processed_costs", + "origin": "PROD" } } }, @@ -1586,7 +2604,7 @@ "name": "sqlite_operator_cleanup_processed_costs_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1717180096108, + "time": 1726599466166, "actor": "urn:li:corpuser:datahub" } } @@ -1604,6 +2622,32 @@ } } }, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)" + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.processed_costs", + "origin": "PROD" + } + } +}, { "entityType": "dataProcessInstance", "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372", @@ -1611,7 +2655,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1717180096108, + "timestampMillis": 1726599466166, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1628,14 +2672,14 @@ "aspectName": "operation", "aspect": { "json": { - "timestampMillis": 1719864203487, + "timestampMillis": 1726599469525, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:airflow", "operationType": "CREATE", - "lastUpdatedTimestamp": 1719864203487 + "lastUpdatedTimestamp": 1726599469525 } } }, @@ -1658,13 +2702,59 @@ "wait_for_downstream": "False", "downstream_task_ids": "[]", "inlets": "[]", - "outlets": "[]" + "outlets": "[]", + "datahub_sql_parser_error": "UnsupportedStatementTypeError: Can only generate column-level lineage for select-like inner statements, not (outer statement type: )", + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n DROP TABLE processed_costs\\n \"}", + "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=cleanup_processed_costs", "name": "cleanup_processed_costs", "type": { "string": "COMMAND" - } + }, + "env": "PROD" + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),cleanup_processed_costs)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)" + ], + "inputDatajobs": [ + "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)" + ], + "fineGrainedLineages": [] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sqlite", + "name": "public.processed_costs", + "origin": "PROD" } } }, @@ -1710,7 +2800,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1717180096993, + "timestampMillis": 1726599470251, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -1722,31 +2812,5 @@ } } } -}, -{ - "entityType": "dataProcessInstance", - "entityUrn": "urn:li:dataProcessInstance:bab908abccf3cd6607b50fdaf3003372", - "changeType": "UPSERT", - "aspectName": "dataProcessInstanceOutput", - "aspect": { - "json": { - "outputs": [ - "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)" - ] - } - } -}, -{ - "entityType": "dataProcessInstance", - "entityUrn": "urn:li:dataProcessInstance:07285de22276959612189d51336cc21a", - "changeType": "UPSERT", - "aspectName": "dataProcessInstanceOutput", - "aspect": { - "json": { - "outputs": [ - "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" - ] - } - } } ] \ No newline at end of file diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py index 2b8d4c47f6224..784176a3b7f4e 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py @@ -165,6 +165,10 @@ def _run_airflow( conn_type="sqlite", host=str(tmp_path / "my_sqlite.db"), ).get_uri(), + # Ensure that the plugin waits for metadata to be written. + # Note that we could also disable the RUN_IN_THREAD entirely, + # but I want to minimize the difference between CI and prod. + "DATAHUB_AIRFLOW_PLUGIN_RUN_IN_THREAD_TIMEOUT": "30", # Convenience settings. "AIRFLOW__DATAHUB__LOG_LEVEL": "DEBUG", "AIRFLOW__DATAHUB__DEBUG_EMITTER": "True", diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md index 8ddcf6aff1035..a60356fc3789f 100644 --- a/metadata-ingestion/sink_docs/datahub.md +++ b/metadata-ingestion/sink_docs/datahub.md @@ -29,6 +29,7 @@ sink: ``` If you are connecting to a hosted DataHub Cloud instance, your sink will look like + ```yml source: # source configs @@ -68,16 +69,17 @@ If you are using [UI based ingestion](../../docs/ui-ingestion.md) then where GMS Note that a `.` is used to denote nested fields in the YAML recipe. | Field | Required | Default | Description | -|----------------------------|----------|----------------------|----------------------------------------------------------------------------------------------------| -| `server` | ✅ | | URL of DataHub GMS endpoint. | +| -------------------------- | -------- | -------------------- | -------------------------------------------------------------------------------------------------- | +| `server` | ✅ | | URL of DataHub GMS endpoint. | +| `token` | | | Bearer token used for authentication. | | `timeout_sec` | | 30 | Per-HTTP request timeout. | | `retry_max_times` | | 1 | Maximum times to retry if HTTP request fails. The delay between retries is increased exponentially | | `retry_status_codes` | | [429, 502, 503, 504] | Retry HTTP request also on these status codes | -| `token` | | | Bearer token used for authentication. | | `extra_headers` | | | Extra headers which will be added to the request. | -| `max_threads` | | `15` | Experimental: Max parallelism for REST API calls | -| `ca_certificate_path` | | | Path to server's CA certificate for verification of HTTPS communications | -| `client_certificate_path` | | | Path to client's CA certificate for HTTPS communications | +| `max_threads` | | `15` | Max parallelism for REST API calls | +| `mode` | | `ASYNC_BATCH` | [Advanced] Mode of operation - `SYNC`, `ASYNC`, or `ASYNC_BATCH` | +| `ca_certificate_path` | | | Path to server's CA certificate for verification of HTTPS communications | +| `client_certificate_path` | | | Path to client's CA certificate for HTTPS communications | | `disable_ssl_verification` | | false | Disable ssl certificate validation | ## DataHub Kafka @@ -115,14 +117,14 @@ sink: Note that a `.` is used to denote nested fields in the YAML recipe. -| Field | Required | Default | Description | -| -------------------------------------------- | -------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `connection.bootstrap` | ✅ | | Kafka bootstrap URL. | -| `connection.producer_config.