From ec062b6787dbf3a4285cc571014b939e778c0fa3 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Mon, 7 Feb 2022 14:29:51 -0800 Subject: [PATCH 1/6] feat(glue): make ownership configurable in glue source (#4078) --- metadata-ingestion/source_docs/glue.md | 1 + .../src/datahub/ingestion/source/aws/glue.py | 20 ++++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/source_docs/glue.md b/metadata-ingestion/source_docs/glue.md index a4ddedfb50be8..27f5074f9775f 100644 --- a/metadata-ingestion/source_docs/glue.md +++ b/metadata-ingestion/source_docs/glue.md @@ -92,6 +92,7 @@ Note that a `.` is used to denote nested fields in the YAML recipe. | `ignore_unsupported_connectors` | | `True` | Whether to ignore unsupported connectors. If disabled, an error will be raised. | | `emit_s3_lineage` | | `True` | Whether to emit S3-to-Glue lineage. | | `glue_s3_lineage_direction` | | `upstream` | If `upstream`, S3 is upstream to Glue. If `downstream` S3 is downstream to Glue. | +| `extract_owners` | | `True` | When enabled, extracts ownership from Glue directly and overwrites existing owners. When disabled, ownership is left empty for datasets. | ## Compatibility diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index d91a5f2893408..1491992469983 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -48,6 +48,7 @@ class GlueSourceConfig(AwsSourceConfig): + extract_owners: Optional[bool] = True extract_transforms: Optional[bool] = True underlying_platform: Optional[str] = None ignore_unsupported_connectors: Optional[bool] = True @@ -89,6 +90,7 @@ class GlueSource(Source): def __init__(self, config: GlueSourceConfig, ctx: PipelineContext): super().__init__(ctx) + self.extract_owners = config.extract_owners self.source_config = config self.report = GlueSourceReport() self.glue_client = config.glue_client @@ -612,7 +614,7 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]: yield dataset_wu def _extract_record(self, table: Dict, table_name: str) -> MetadataChangeEvent: - def get_owner() -> OwnershipClass: + def get_owner() -> Optional[OwnershipClass]: owner = table.get("Owner") if owner: owners = [ @@ -621,11 +623,10 @@ def get_owner() -> OwnershipClass: type=OwnershipTypeClass.DATAOWNER, ) ] - else: - owners = [] - return OwnershipClass( - owners=owners, - ) + return OwnershipClass( + owners=owners, + ) + return None def get_dataset_properties() -> DatasetPropertiesClass: return DatasetPropertiesClass( @@ -680,7 +681,12 @@ def get_schema_metadata(glue_source: GlueSource) -> SchemaMetadata: ) dataset_snapshot.aspects.append(Status(removed=False)) - dataset_snapshot.aspects.append(get_owner()) + + if self.extract_owners: + optional_owner_aspect = get_owner() + if optional_owner_aspect is not None: + dataset_snapshot.aspects.append(optional_owner_aspect) + dataset_snapshot.aspects.append(get_dataset_properties()) dataset_snapshot.aspects.append(get_schema_metadata(self)) From 60c17a2ba7fad598bca9483a7539623dfda6e490 Mon Sep 17 00:00:00 2001 From: Dexter Lee Date: Mon, 7 Feb 2022 19:58:16 -0800 Subject: [PATCH 2/6] fix(ingest): datahub-rest - retry on POST for emitter (#4081) Co-authored-by: Shirshanka Das --- metadata-ingestion/src/datahub/emitter/rest_emitter.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/emitter/rest_emitter.py b/metadata-ingestion/src/datahub/emitter/rest_emitter.py index ddd9003a610cf..ee966e6bb8954 100644 --- a/metadata-ingestion/src/datahub/emitter/rest_emitter.py +++ b/metadata-ingestion/src/datahub/emitter/rest_emitter.py @@ -50,7 +50,8 @@ class DatahubRestEmitter: 503, 504, ] - DEFAULT_RETRY_MAX_TIMES = 1 + DEFAULT_RETRY_METHODS = ["HEAD", "GET", "POST", "PUT", "DELETE", "OPTIONS", "TRACE"] + DEFAULT_RETRY_MAX_TIMES = 3 _gms_server: str _token: Optional[str] @@ -58,6 +59,7 @@ class DatahubRestEmitter: _connect_timeout_sec: float = DEFAULT_CONNECT_TIMEOUT_SEC _read_timeout_sec: float = DEFAULT_READ_TIMEOUT_SEC _retry_status_codes: List[int] = DEFAULT_RETRY_STATUS_CODES + _retry_methods: List[str] = DEFAULT_RETRY_METHODS _retry_max_times: int = DEFAULT_RETRY_MAX_TIMES def __init__( @@ -67,6 +69,7 @@ def __init__( connect_timeout_sec: Optional[float] = None, read_timeout_sec: Optional[float] = None, retry_status_codes: Optional[List[int]] = None, + retry_methods: Optional[List[str]] = None, retry_max_times: Optional[int] = None, extra_headers: Optional[Dict[str, str]] = None, ca_certificate_path: Optional[str] = None, @@ -105,6 +108,9 @@ def __init__( if retry_status_codes is not None: # Only if missing. Empty list is allowed self._retry_status_codes = retry_status_codes + if retry_methods is not None: + self._retry_methods = retry_methods + if retry_max_times: self._retry_max_times = retry_max_times @@ -112,6 +118,7 @@ def __init__( total=self._retry_max_times, status_forcelist=self._retry_status_codes, backoff_factor=2, + allowed_methods=self._retry_methods, ) adapter = HTTPAdapter( From 2de29dc623854a35cd808d789ad9ccec22dbfa54 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Tue, 8 Feb 2022 19:09:30 +0100 Subject: [PATCH 3/6] feat(ingest) - bigquery: More verbose and faster lineage generation and option to set partition datetime for profiling (#4079) --- .../source_docs/sql_profiles.md | 48 ++++++++++--------- .../ingestion/source/ge_profiling_config.py | 2 + .../datahub/ingestion/source/sql/bigquery.py | 33 +++++++++---- .../ingestion/source/sql/sql_common.py | 5 +- 4 files changed, 53 insertions(+), 35 deletions(-) diff --git a/metadata-ingestion/source_docs/sql_profiles.md b/metadata-ingestion/source_docs/sql_profiles.md index e59a0d45a3be5..c380e96b6ade1 100644 --- a/metadata-ingestion/source_docs/sql_profiles.md +++ b/metadata-ingestion/source_docs/sql_profiles.md @@ -69,30 +69,32 @@ sink: Note that a `.` is used to denote nested fields in the YAML recipe. -| Field | Required | Default | Description | -| -------------------------------------------------- | -------- |----------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `profiling.enabled` | | `False` | Whether profiling should be done. | -| `profiling.bigquery_temp_table_schema` | | | On bigquery for profiling partitioned tables needs to create temporary views. You have to define a schema where these will be created. Views will be cleaned up after profiler runs. (Great expectation tech details about this [here](https://legacy.docs.greatexpectations.io/en/0.9.0/reference/integrations/bigquery.html#custom-queries-with-sql-datasource). | -| `profiling.limit` | | | Max number of documents to profile. By default, profiles all documents. | -| `profiling.offset` | | | Offset in documents to profile. By default, uses no offset. | -| `profiling.max_workers` | | `5 * os.cpu_count()` | Number of worker threads to use for profiling. Set to 1 to disable. | -| `profiling.query_combiner_enabled` | | `True` | *This feature is still experimental and can be disabled if it causes issues.* Reduces the total number of queries issued and speeds up profiling by dynamically combining SQL queries where possible. | -| `profile_pattern.allow` | | `*` | List of regex patterns for tables or table columns to profile. Defaults to all. | -| `profile_pattern.deny` | | | List of regex patterns for tables or table columns to not profile. Defaults to none. | -| `profile_pattern.ignoreCase` | | `True` | Whether to ignore case sensitivity during pattern matching. | -| `profiling.turn_off_expensive_profiling_metrics` | | False | Whether to turn off expensive profiling or not. This turns off profiling for quantiles, distinct_value_frequencies, histogram & sample_values. This also limits maximum number of fields being profiled to 10. | -| `profiling.max_number_of_fields_to_profile` | | `None` | A positive integer that specifies the maximum number of columns to profile for any table. `None` implies all columns. The cost of profiling goes up significantly as the number of columns to profile goes up. | -| `profiling.profile_table_level_only` | | False | Whether to perform profiling at table-level only, or include column-level profiling as well. | -| `profiling.include_field_null_count` | | `True` | Whether to profile for the number of nulls for each column. | -| `profiling.include_field_min_value` | | `True` | Whether to profile for the min value of numeric columns. | -| `profiling.include_field_max_value` | | `True` | Whether to profile for the max value of numeric columns. | -| `profiling.include_field_mean_value` | | `True` | Whether to profile for the mean value of numeric columns. | -| `profiling.include_field_median_value` | | `True` | Whether to profile for the median value of numeric columns. | -| `profiling.include_field_stddev_value` | | `True` | Whether to profile for the standard deviation of numeric columns. | -| `profiling.include_field_quantiles` | | `False` | Whether to profile for the quantiles of numeric columns. | +| Field | Required | Default | Description | +|------------------------------------------------------|----------|----------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `profiling.enabled` | | `False` | Whether profiling should be done. | +| `profiling.bigquery_temp_table_schema` | | | On bigquery for profiling partitioned tables needs to create temporary views. You have to define a schema where these will be created. Views will be cleaned up after profiler runs. (Great expectation tech details about this [here](https://legacy.docs.greatexpectations.io/en/0.9.0/reference/integrations/bigquery.html#custom-queries-with-sql-datasource). | +| `profiling.limit` | | | Max number of documents to profile. By default, profiles all documents. | +| `profiling.offset` | | | Offset in documents to profile. By default, uses no offset. | +| `profiling.max_workers` | | `5 * os.cpu_count()` | Number of worker threads to use for profiling. Set to 1 to disable. | +| `profiling.query_combiner_enabled` | | `True` | *This feature is still experimental and can be disabled if it causes issues.* Reduces the total number of queries issued and speeds up profiling by dynamically combining SQL queries where possible. | +| `profile_pattern.allow` | | `*` | List of regex patterns for tables or table columns to profile. Defaults to all. | +| `profile_pattern.deny` | | | List of regex patterns for tables or table columns to not profile. Defaults to none. | +| `profile_pattern.ignoreCase` | | `True` | Whether to ignore case sensitivity during pattern matching. | +| `profiling.turn_off_expensive_profiling_metrics` | | False | Whether to turn off expensive profiling or not. This turns off profiling for quantiles, distinct_value_frequencies, histogram & sample_values. This also limits maximum number of fields being profiled to 10. | +| `profiling.max_number_of_fields_to_profile` | | `None` | A positive integer that specifies the maximum number of columns to profile for any table. `None` implies all columns. The cost of profiling goes up significantly as the number of columns to profile goes up. | +| `profiling.profile_table_level_only` | | False | Whether to perform profiling at table-level only, or include column-level profiling as well. | +| `profiling.include_field_null_count` | | `True` | Whether to profile for the number of nulls for each column. | +| `profiling.include_field_min_value` | | `True` | Whether to profile for the min value of numeric columns. | +| `profiling.include_field_max_value` | | `True` | Whether to profile for the max value of numeric columns. | +| `profiling.include_field_mean_value` | | `True` | Whether to profile for the mean value of numeric columns. | +| `profiling.include_field_median_value` | | `True` | Whether to profile for the median value of numeric columns. | +| `profiling.include_field_stddev_value` | | `True` | Whether to profile for the standard deviation of numeric columns. | +| `profiling.include_field_quantiles` | | `False` | Whether to profile for the quantiles of numeric columns. | | `profiling.include_field_distinct_value_frequencies` | | `False` | Whether to profile for distinct value frequencies. | -| `profiling.include_field_histogram` | | `False` | Whether to profile for the histogram for numeric fields. | -| `profiling.include_field_sample_values` | | `True` | Whether to profile for the sample values for all columns. | +| `profiling.include_field_histogram` | | `False` | Whether to profile for the histogram for numeric fields. | +| `profiling.include_field_sample_values` | | `True` | Whether to profile for the sample values for all columns. | +| `profiling.partition_datetime` | | | For partitioned datasets profile only the partition which matches the datetime or profile the latest one if not set. Only Bigquery supports this. | + ## Compatibility Coming soon! diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py index 657195916e114..974d622090da3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py @@ -1,3 +1,4 @@ +import datetime import os from typing import Any, Dict, List, Optional @@ -43,6 +44,7 @@ class GEProfilingConfig(ConfigModel): catch_exceptions: bool = True bigquery_temp_table_schema: Optional[str] = None + partition_datetime: Optional[datetime.datetime] @pydantic.root_validator() def ensure_field_level_settings_are_normalized( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py index 7e2afe1306f9c..1cae97f2ad2a9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py @@ -69,6 +69,10 @@ protoPayload.serviceData.jobCompletedEvent.job.jobStatus.state="DONE" AND NOT protoPayload.serviceData.jobCompletedEvent.job.jobStatus.error.code:* + AND + protoPayload.serviceData.jobCompletedEvent.job.jobConfiguration.query.destinationTable.datasetId !~ "^_.*" + AND + protoPayload.serviceData.jobCompletedEvent.job.jobStatistics.referencedTables:* ) ) AND @@ -95,7 +99,7 @@ where is_partitioning_column = 'YES' -- Filter out special partitions (https://cloud.google.com/bigquery/docs/partitioned-tables#date_timestamp_partitioned_tables) - and p.partition_id not in ('__NULL__', '__UNPARTITIONED__') + and p.partition_id not in ('__NULL__', '__UNPARTITIONED__', '__STREAMING_UNPARTITIONED__') and STORAGE_TIER='ACTIVE' and p.table_name= '{table}' group by @@ -370,12 +374,20 @@ def _get_bigquery_log_entries( ), ) - logger.debug("Start loading log entries from BigQuery") + assert self.config.log_page_size is not None + + logger.info("Start loading log entries from BigQuery") for client in clients: - yield from client.list_entries( + entries = client.list_entries( filter_=filter, page_size=self.config.log_page_size ) - logger.debug("finished loading log entries from BigQuery") + item = 0 + for entry in entries: + item = item + 1 + if item % self.config.log_page_size == 0: + logger.info(f"Read {item} entry from log entries") + yield entry + logger.info(f"Finished loading {item} log entries from BigQuery") def _get_exported_bigquery_audit_metadata( self, bigquery_client: BigQueryClient @@ -391,7 +403,7 @@ def _get_exported_bigquery_audit_metadata( ).strftime(BQ_DATETIME_FORMAT) for dataset in self.config.bigquery_audit_metadata_datasets: - logger.debug( + logger.info( f"Start loading log entries from BigQueryAuditMetadata in {dataset}" ) @@ -418,7 +430,7 @@ def _get_exported_bigquery_audit_metadata( ).format(start_time=start_time, end_time=end_time) query_job = bigquery_client.query(query) - logger.debug( + logger.info( f"Finished loading log entries from BigQueryAuditMetadata in {dataset}" ) @@ -542,7 +554,7 @@ def is_latest_shard(self, project_id: str, schema: str, table: str) -> bool: return True def generate_partition_profiler_query( - self, schema: str, table: str + self, schema: str, table: str, partition_datetime: Optional[datetime.datetime] ) -> Tuple[Optional[str], Optional[str]]: """ Method returns partition id if table is partitioned or sharded and generate custom partition query for @@ -553,12 +565,13 @@ def generate_partition_profiler_query( partition = self.get_latest_partition(schema, table) if partition: partition_ts: Union[datetime.datetime, datetime.date] - + if not partition_datetime: + partition_datetime = parser.parse(partition.partition_id) logger.debug(f"{table} is partitioned and partition column is {partition}") if partition.data_type in ("TIMESTAMP", "DATETIME"): - partition_ts = parser.parse(partition.partition_id) + partition_ts = partition_datetime elif partition.data_type == "DATE": - partition_ts = parser.parse(partition.partition_id).date() + partition_ts = partition_datetime.date() else: logger.warning(f"Not supported partition type {partition.data_type}") return None, None diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index efbf6800009f1..ab07b77a037f6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -1,3 +1,4 @@ +import datetime import logging from abc import abstractmethod from dataclasses import dataclass, field @@ -1071,7 +1072,7 @@ def _get_profiler_instance(self, inspector: Inspector) -> "DatahubGEProfiler": # Override if needed def generate_partition_profiler_query( - self, schema: str, table: str + self, schema: str, table: str, partition_datetime: Optional[datetime.datetime] ) -> Tuple[Optional[str], Optional[str]]: return None, None @@ -1111,7 +1112,7 @@ def loop_profiler_requests( continue (partition, custom_sql) = self.generate_partition_profiler_query( - schema, table + schema, table, self.config.profiling.partition_datetime ) self.report.report_entity_profiled(dataset_name) From 9d3bc828826e5192ebd176496c07af62334ab2be Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Tue, 8 Feb 2022 11:32:54 -0800 Subject: [PATCH 4/6] make schema tab no longer default for glossary term (#4080) --- .../src/app/entity/glossaryTerm/GlossaryTermEntity.tsx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datahub-web-react/src/app/entity/glossaryTerm/GlossaryTermEntity.tsx b/datahub-web-react/src/app/entity/glossaryTerm/GlossaryTermEntity.tsx index 8bf6e580ca1cd..fd3e0a7aaf3f8 100644 --- a/datahub-web-react/src/app/entity/glossaryTerm/GlossaryTermEntity.tsx +++ b/datahub-web-react/src/app/entity/glossaryTerm/GlossaryTermEntity.tsx @@ -60,6 +60,10 @@ export class GlossaryTermEntity implements Entity { entityType={EntityType.GlossaryTerm} useEntityQuery={useGetGlossaryTermQuery as any} tabs={[ + { + name: 'Related Entities', + component: GlossaryRelatedEntity, + }, { name: 'Schema', component: SchemaTab, @@ -73,10 +77,6 @@ export class GlossaryTermEntity implements Entity { glossaryTerm?.glossaryTerm?.schemaMetadata !== null, }, }, - { - name: 'Related Entities', - component: GlossaryRelatedEntity, - }, { name: 'Related Terms', component: GlossayRelatedTerms, From f5a51f0a74492ab672c1f623f4761af30fe3c1c3 Mon Sep 17 00:00:00 2001 From: John Joyce Date: Tue, 8 Feb 2022 12:17:38 -0800 Subject: [PATCH 5/6] fix(ingest): rest-emitter - fix serialization helper conditional (#4090) hot-fix for rest emission issues in 0.8.25.0 and 0.8.25.1 --- metadata-ingestion/src/datahub/emitter/serialization_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/emitter/serialization_helper.py b/metadata-ingestion/src/datahub/emitter/serialization_helper.py index 54a2e6cd63af6..5a348ce267b10 100644 --- a/metadata-ingestion/src/datahub/emitter/serialization_helper.py +++ b/metadata-ingestion/src/datahub/emitter/serialization_helper.py @@ -7,7 +7,7 @@ def _json_transform(obj: Any, from_pattern: str, to_pattern: str) -> Any: if len(obj.keys()) == 1: key: str = list(obj.keys())[0] value = obj[key] - if key.startswith(from_pattern) >= 0: + if key.startswith(from_pattern): new_key = key.replace(from_pattern, to_pattern, 1) return {new_key: _json_transform(value, from_pattern, to_pattern)} From 306fe0b5ffe3e59857ca5643136c8b29d80d4d60 Mon Sep 17 00:00:00 2001 From: Gabe Lyons Date: Tue, 8 Feb 2022 12:27:09 -0800 Subject: [PATCH 6/6] fix(terms): fix removing terms from schema field & add cypress tests to cover these flows (#4091) --- .../resolvers/mutate/util/LabelUtils.java | 2 +- .../Schema/utils/useTagsAndTermsRenderer.tsx | 32 ++++++------- .../src/app/shared/tags/AddTagTermModal.tsx | 6 ++- .../src/app/shared/tags/TagTermGroup.tsx | 4 +- .../integration/mutations/mutations.js | 45 +++++++++++++++++++ 5 files changed, 70 insertions(+), 19 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java index 69208f0102c72..36776d9eaca0b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java @@ -66,7 +66,7 @@ public static void removeTermFromTarget( } removeTermIfExists(editableFieldInfo.getGlossaryTerms(), labelUrn); - persistAspect(targetUrn, GLOSSARY_TERM_ASPECT_NAME, editableSchemaMetadata, actor, entityService); + persistAspect(targetUrn, EDITABLE_SCHEMA_METADATA, editableSchemaMetadata, actor, entityService); } } diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useTagsAndTermsRenderer.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useTagsAndTermsRenderer.tsx index f40a3c9ecde0f..c2c46be8a6b98 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useTagsAndTermsRenderer.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useTagsAndTermsRenderer.tsx @@ -19,21 +19,23 @@ export default function useTagsAndTermsRenderer( ); return ( - setTagHoveredIndex(undefined)} - entityUrn={urn} - entityType={EntityType.Dataset} - entitySubresource={record.fieldPath} - refetch={refetch} - /> +
+ setTagHoveredIndex(undefined)} + entityUrn={urn} + entityType={EntityType.Dataset} + entitySubresource={record.fieldPath} + refetch={refetch} + /> +
); }; return tagAndTermRender; diff --git a/datahub-web-react/src/app/shared/tags/AddTagTermModal.tsx b/datahub-web-react/src/app/shared/tags/AddTagTermModal.tsx index f3a80c7d20ade..472d57d9bd5d4 100644 --- a/datahub-web-react/src/app/shared/tags/AddTagTermModal.tsx +++ b/datahub-web-react/src/app/shared/tags/AddTagTermModal.tsx @@ -236,7 +236,11 @@ export default function AddTagTermModal({ - diff --git a/datahub-web-react/src/app/shared/tags/TagTermGroup.tsx b/datahub-web-react/src/app/shared/tags/TagTermGroup.tsx index 2a22e9681948d..10a083b47570e 100644 --- a/datahub-web-react/src/app/shared/tags/TagTermGroup.tsx +++ b/datahub-web-react/src/app/shared/tags/TagTermGroup.tsx @@ -229,7 +229,7 @@ export default function TagTermGroup({ {...buttonProps} > - Add Tag + Add Tag )} {canAddTerm && @@ -243,7 +243,7 @@ export default function TagTermGroup({ {...buttonProps} > - Add Term + Add Term )} {showAddModal && !!entityUrn && !!entityType && ( diff --git a/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js b/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js index 4a5b4eb8924c4..a8ba4afc192d4 100644 --- a/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js +++ b/smoke-test/tests/cypress/cypress/integration/mutations/mutations.js @@ -37,4 +37,49 @@ describe('mutations', () => { cy.deleteUrn('urn:li:tag:CypressTestAddTag') }); + + it('can add and remove terms from a dataset', () => { + cy.login(); + cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)'); + cy.contains('cypress_logging_events'); + + cy.contains('Add Term').click(); + + cy.focused().type('CypressTerm'); + + cy.get('.ant-select-item-option-content').within(() => cy.contains('CypressNode.CypressTerm').click({force: true})); + + cy.get('[data-testid="add-tag-term-from-modal-btn"]').click({force: true}); + cy.get('[data-testid="add-tag-term-from-modal-btn"]').should('not.exist'); + + cy.contains('CypressTerm'); + + cy.get('a[href="/glossary/urn:li:glossaryTerm:CypressNode.CypressTerm"]').within(() => cy.get('span[aria-label=close]').click()); + cy.contains('Yes').click(); + + cy.contains('CypressTerm').should('not.exist'); + }); + + it('can add and remove terms from a dataset field', () => { + cy.login(); + // make space for the glossary term column + cy.viewport(1300, 800) + cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)'); + cy.get('[data-testid="schema-field-event_name-terms"]').trigger('mouseover', {force: true}); + cy.get('[data-testid="schema-field-event_name-terms"]').within(() => cy.contains('Add Term').click()) + + cy.focused().type('CypressTerm'); + + cy.get('.ant-select-item-option-content').within(() => cy.contains('CypressNode.CypressTerm').click({force: true})); + + cy.get('[data-testid="add-tag-term-from-modal-btn"]').click({force: true}); + cy.get('[data-testid="add-tag-term-from-modal-btn"]').should('not.exist'); + + cy.contains('CypressTerm'); + + cy.get('a[href="/glossary/urn:li:glossaryTerm:CypressNode.CypressTerm"]').within(() => cy.get('span[aria-label=close]').click()); + cy.contains('Yes').click(); + + cy.contains('CypressTerm').should('not.exist'); + }); })