Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
anshbansal authored Nov 5, 2024
2 parents 6d27fc8 + 83ec73b commit 1199cc4
Show file tree
Hide file tree
Showing 18 changed files with 118 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,6 @@ public void testMCPBatch() throws IOException {
OP_CONTEXT
.getObjectMapper()
.readTree(
"{\"source\":{\"type\":\"datahub-gc\",\"config\":{\"cleanup_expired_tokens\":false,\"truncate_indices\":true,\"dataprocess_cleanup\":{\"retention_days\":10,\"delete_empty_data_jobs\":true,\"delete_empty_data_flows\":true,\"hard_delete_entities\":false,\"keep_last_n\":5},\"soft_deleted_entities_cleanup\":{\"retention_days\":10}}}}"));
"{\"source\":{\"type\":\"datahub-gc\",\"config\":{\"cleanup_expired_tokens\":false,\"truncate_indices\":true,\"dataprocess_cleanup\":{\"retention_days\":10,\"delete_empty_data_jobs\":true,\"delete_empty_data_flows\":true,\"hard_delete_entities\":false,\"keep_last_n\":5},\"soft_deleted_entities_cleanup\":{\"retention_days\":10},\"execution_request_cleanup\":{\"keep_history_min_count\":10,\"keep_history_max_count\":1000,\"keep_history_max_days\":30,\"batch_read_size\":100,\"enabled\":false}}}}"));
}
}
10 changes: 10 additions & 0 deletions docs-website/adoptionStoriesIndexes.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,16 @@
"category": "Financial & Fintech",
"description": "<i>\"We found DataHub to provide excellent coverage for our needs. What we appreciate most about DataHub is <b>its powerful API platform.</b>\"<br /><br /><div style='color: gray;'>— Jean-Pierre Dijcks, Sr. Dir. Product Management at VISA</div></i><br />"
},
{
"name": "Apple",
"slug": "apple",
"link": "https://youtu.be/5eFZuzG4c-s?feature=shared",
"linkType": "video",
"tagline": "How Apple built a solid foundation for observability, governance, and data sharing with DataHub",
"category": "B2B & B2C",
"platform": "cloud",
"description": "Discover how DataHub provides a solid foundation for observability, governance, and data sharing, while we explore its role in managing AI and data metadata."
},
{
"name": "Notion",
"slug": "notion",
Expand Down
4 changes: 2 additions & 2 deletions docs-website/docusaurus.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ module.exports = {
// },
// }),
announcementBar: {
id: "announcement-2",
id: "announcement-3",
content:
'<div style="display: flex; justify-content: center; align-items: center;width: 100%;"><!--img src="/img/acryl-logo-white-mark.svg" / --><div style="font-size: .8rem; font-weight: 600; background-color: white; color: #111; padding: 0px 8px; border-radius: 4px; margin-right:12px;">NEW</div><p>Join us at Metadata & AI Summit, Oct. 29 & 30!</p><a href="http://www.acryldata.io/conference?utm_source=datahub_web&utm_medium=metadata_ai_2024&utm_campaign=home_banner" target="_blank" class="button">Register<span> →</span></a></div>',
'<div style="display: flex; justify-content: center; align-items: center;width: 100%;"><!--img src="/img/acryl-logo-white-mark.svg" / --><!--div style="font-size: .8rem; font-weight: 600; background-color: white; color: #111; padding: 0px 8px; border-radius: 4px; margin-right:12px;">NEW</div--><p>Watch Metadata & AI Summit sessions on-demand.</p><a href="https://www.youtube.com/@DataHubProject/videos" target="_blank" class="button">Watch Now<span> →</span></a></div>',
backgroundColor: "#111",
textColor: "#ffffff",
isCloseable: false,
Expand Down
1 change: 1 addition & 0 deletions docs-website/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
"markprompt": "^0.1.7",
"react": "^18.2.0",
"react-dom": "18.2.0",
"react-use-draggable-scroll": "^0.4.7",
"sass": "^1.43.2",
"swc-loader": "^0.2.6",
"swiper": "^11.1.4",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
flex: 1;
}
.cardLink {
user-select: none;
-webkit-user-drag: none;

color: #000;

&:hover {
Expand Down Expand Up @@ -88,6 +91,8 @@
z-index: 10;
filter: brightness(2);
opacity: .9;
user-select: none;
-webkit-user-drag: none;
}
.cardImageBackground {
position: absolute;
Expand Down
10 changes: 10 additions & 0 deletions docs-website/src/pages/_components/CaseStudy/caseStudyContent.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ const caseStudyData = [
image: "https://datahubproject.io/img/logos/companies/netflix.png",
link: "https://datahubproject.io/adoption-stories/#netflix",
},
{
title: "A Solid Foundation For Data and AI",
description:
"How Apple built a solid foundation for observability, governance, and data sharing with DataHub.",
tag: "Technology",
backgroundImage:
"https://upload.wikimedia.org/wikipedia/commons/thumb/5/5a/Aerial_view_of_Apple_Park_dllu.jpg/2560px-Aerial_view_of_Apple_Park_dllu.jpg",
image: "/img/logos/companies/apple_text.png",
link: "https://datahubproject.io/adoption-stories/#apple",
},
{
title: "Scaling Data Governance",
description:
Expand Down
16 changes: 10 additions & 6 deletions docs-website/src/pages/_components/CaseStudy/index.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import React from "react";
import React, { useRef } from "react";
import styles from "./case-study.module.scss";
import Link from '@docusaurus/Link'
import { Carousel } from "antd";
import { useDraggable } from "react-use-draggable-scroll";
import caseStudyData from "./caseStudyContent";


const CaseStudy = () => {
const containerRef = useRef(null);

const { events } = useDraggable(containerRef);

return (
<div className={styles.container}>
{/* Section-1 */}
Expand All @@ -15,16 +19,16 @@ const CaseStudy = () => {
<p>Across finance, healthcare, e-commerce, and countless more.</p>
</div>

<div className={styles.card_row}>
<div className={styles.card_row_wrapper} >
<div className={styles.card_row} {...events} ref={containerRef}>
<div className={styles.card_row_wrapper}>
{caseStudyData.map((caseStudy) => (
<div className={styles.card} key={caseStudy.link}>
<a className={styles.cardLink} href={caseStudy.link} style={caseStudy.backgroundImage ? null : {
<a draggable={false} className={styles.cardLink} href={caseStudy.link} style={caseStudy.backgroundImage ? null : {
opacity: .5
}}>
{caseStudy.tag ? <span className={styles.card_tag}>{caseStudy.tag}</span> : null}
<div className={styles.card_image} style={{ backgroundColor: caseStudy.backgroundImage ? null : '#eee' }}>
<img src={caseStudy.image} alt="" />
<img src={caseStudy.image} draggable={false} alt="" />
<div className={styles.cardImageBackground} style={{ backgroundImage: `url(${caseStudy.backgroundImage})` }} />
</div>
<div className={styles.card_heading_div}>
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
15 changes: 6 additions & 9 deletions docs-website/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1827,7 +1827,7 @@
"@docusaurus/theme-search-algolia" "2.4.3"
"@docusaurus/types" "2.4.3"

"@docusaurus/[email protected]":
"@docusaurus/[email protected]", "react-loadable@npm:@docusaurus/[email protected]":
version "5.5.2"
resolved "https://registry.yarnpkg.com/@docusaurus/react-loadable/-/react-loadable-5.5.2.tgz#81aae0db81ecafbdaee3651f12804580868fa6ce"
integrity sha512-A3dYjdBGuy0IGT+wyLIGIKLRE+sAk1iNk0f1HjNDysO7u8lhL4N3VEm+FAubmJbAztn94F7MxBTPmnixbiyFdQ==
Expand Down Expand Up @@ -9713,14 +9713,6 @@ react-loadable-ssr-addon-v5-slorber@^1.0.1:
dependencies:
"@babel/runtime" "^7.10.3"

"react-loadable@npm:@docusaurus/[email protected]":
version "5.5.2"
resolved "https://registry.yarnpkg.com/@docusaurus/react-loadable/-/react-loadable-5.5.2.tgz#81aae0db81ecafbdaee3651f12804580868fa6ce"
integrity sha512-A3dYjdBGuy0IGT+wyLIGIKLRE+sAk1iNk0f1HjNDysO7u8lhL4N3VEm+FAubmJbAztn94F7MxBTPmnixbiyFdQ==
dependencies:
"@types/react" "*"
prop-types "^15.6.2"

react-markdown@^8.0.6:
version "8.0.7"
resolved "https://registry.yarnpkg.com/react-markdown/-/react-markdown-8.0.7.tgz#c8dbd1b9ba5f1c5e7e5f2a44de465a3caafdf89b"
Expand Down Expand Up @@ -9835,6 +9827,11 @@ react-textarea-autosize@^8.3.2:
use-composed-ref "^1.3.0"
use-latest "^1.2.1"

react-use-draggable-scroll@^0.4.7:
version "0.4.7"
resolved "https://registry.yarnpkg.com/react-use-draggable-scroll/-/react-use-draggable-scroll-0.4.7.tgz#86e77caab921ca07b134e9e1d1bc1810aeee4916"
integrity sha512-6gCxGPO9WV5dIsBaDrgUKBaac8CY07PkygcArfajijYSNDwAq0girDRjaBuF1+lRqQryoLFQfpVaV2u/Yh6CrQ==

react-waypoint@^10.3.0:
version "10.3.0"
resolved "https://registry.yarnpkg.com/react-waypoint/-/react-waypoint-10.3.0.tgz#fcc60e86c6c9ad2174fa58d066dc6ae54e3df71d"
Expand Down
11 changes: 11 additions & 0 deletions metadata-ingestion/src/datahub/cli/delete_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,17 @@ def references(urn: str, dry_run: bool, force: bool) -> None:
logger.info(f"Deleted {references_count} references to {urn}")


@delete.command()
@click.option("--urn", required=True, type=str, help="the urn of the entity")
def undo_by_filter(urn: str) -> None:
"""
Undo a soft deletion of an entity
"""
graph = get_default_graph()
logger.info(f"Using {graph}")
graph.set_soft_delete_status(urn=urn, delete=False)


@delete.command(no_args_is_help=True)
@click.option(
"--urn",
Expand Down
17 changes: 16 additions & 1 deletion metadata-ingestion/src/datahub/ingestion/graph/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1241,14 +1241,29 @@ def soft_delete_entity(
Args:
urn: The urn of the entity to soft-delete.
"""
self.set_soft_delete_status(
urn=urn, run_id=run_id, deletion_timestamp=deletion_timestamp, delete=True
)

def set_soft_delete_status(
self,
urn: str,
delete: bool,
run_id: str = _GRAPH_DUMMY_RUN_ID,
deletion_timestamp: Optional[int] = None,
) -> None:
"""Change status of soft-delete an entity by urn.
Args:
urn: The urn of the entity to soft-delete.
"""
assert urn

deletion_timestamp = deletion_timestamp or int(time.time() * 1000)
self.emit(
MetadataChangeProposalWrapper(
entityUrn=urn,
aspect=StatusClass(removed=True),
aspect=StatusClass(removed=delete),
systemMetadata=SystemMetadataClass(
runId=run_id, lastObserved=deletion_timestamp
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def __init__(
self.context = context

self.database = database
self.known_urns: Set[str] = set() # will be set later

self.aggregator = SqlParsingAggregator(
platform=self.platform,
Expand All @@ -68,6 +69,7 @@ def __init__(
generate_operations=False,
usage_config=self.config,
graph=self.context.graph,
is_temp_table=self._is_temp_table,
)
self.report.sql_aggregator = self.aggregator.report

Expand All @@ -87,7 +89,16 @@ def __init__(
self.report.lineage_end_time,
) = self._lineage_v1.get_time_window()

self.known_urns: Set[str] = set() # will be set later
def _is_temp_table(self, name: str) -> bool:
return (
DatasetUrn.create_from_ids(
self.platform,
name,
env=self.config.env,
platform_instance=self.config.platform_instance,
).urn()
not in self.known_urns
)

def build(
self,
Expand All @@ -107,15 +118,6 @@ def build(
for schema, tables in schemas.items()
for table in tables
}
self.aggregator._is_temp_table = (
lambda name: DatasetUrn.create_from_ids(
self.platform,
name,
env=self.config.env,
platform_instance=self.config.platform_instance,
).urn()
not in self.known_urns
)

# Handle all the temp tables up front.
if self.config.resolve_temp_table_in_lineage:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -537,8 +537,13 @@ def _maybe_format_query(self, query: str) -> str:
return query

@functools.lru_cache(maxsize=128)
def _name_from_urn(self, urn: UrnStr) -> str:
name = DatasetUrn.from_string(urn).name
def _name_from_urn(self, urn: UrnStr) -> Optional[str]:
urn_obj = DatasetUrn.from_string(urn)
if urn_obj.platform != self.platform.urn():
# If this is external (e.g. s3), we don't know the name.
return None

name = urn_obj.name
if (
platform_instance := self._schema_resolver.platform_instance
) and name.startswith(platform_instance):
Expand All @@ -549,14 +554,22 @@ def _name_from_urn(self, urn: UrnStr) -> str:
def is_temp_table(self, urn: UrnStr) -> bool:
if self._is_temp_table is None:
return False
return self._is_temp_table(self._name_from_urn(urn))
name = self._name_from_urn(urn)
if name is None:
# External tables are not temp tables.
return False
return self._is_temp_table(name)

def is_allowed_table(self, urn: UrnStr) -> bool:
def is_allowed_table(self, urn: UrnStr, allow_external: bool = True) -> bool:
if self.is_temp_table(urn):
return False
if self._is_allowed_table is None:
return True
return self._is_allowed_table(self._name_from_urn(urn))
name = self._name_from_urn(urn)
if name is None:
# Treat external tables specially.
return allow_external
return self._is_allowed_table(name)

def add(
self,
Expand Down Expand Up @@ -852,7 +865,7 @@ def add_preparsed_query(
upstream_fields = parsed.column_usage or {}
for upstream_urn in parsed.upstreams:
# If the upstream table is a temp table or otherwise denied by filters, don't log usage for it.
if not self.is_allowed_table(upstream_urn) or (
if not self.is_allowed_table(upstream_urn, allow_external=False) or (
require_out_table_schema
and not self._schema_resolver.has_urn(upstream_urn)
):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"com.linkedin.pegasus2avro.common.InstitutionalMemory": {
"elements": [
{
"url": "https://raw.githubusercontent.com/OAI/OpenAPI-Specification/main/examples/",
"url": "https://raw.githubusercontent.com/OAI/OpenAPI-Specification/main/tests/",
"description": "Link to call for the dataset.",
"createStamp": {
"time": 1586847600,
Expand Down Expand Up @@ -71,7 +71,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "openapi-2020_04_14-07_00_00"
"runId": "openapi-2020_04_14-07_00_00-m1k7d5",
"lastRunId": "no-run-id-provided"
}
},
{
Expand All @@ -95,7 +96,7 @@
"com.linkedin.pegasus2avro.common.InstitutionalMemory": {
"elements": [
{
"url": "https://raw.githubusercontent.com/OAI/OpenAPI-Specification/main/examples/v2",
"url": "https://raw.githubusercontent.com/OAI/OpenAPI-Specification/main/tests/v2",
"description": "Link to call for the dataset.",
"createStamp": {
"time": 1586847600,
Expand Down Expand Up @@ -146,7 +147,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "openapi-2020_04_14-07_00_00"
"runId": "openapi-2020_04_14-07_00_00-m1k7d5",
"lastRunId": "no-run-id-provided"
}
},
{
Expand All @@ -161,7 +163,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "openapi-2020_04_14-07_00_00"
"runId": "openapi-2020_04_14-07_00_00-m1k7d5",
"lastRunId": "no-run-id-provided"
}
},
{
Expand All @@ -176,7 +179,8 @@
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "openapi-2020_04_14-07_00_00"
"runId": "openapi-2020_04_14-07_00_00-m1k7d5",
"lastRunId": "no-run-id-provided"
}
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ source:
type: openapi
config:
name: test_openapi
url: https://raw.githubusercontent.com/OAI/OpenAPI-Specification/main/examples/
swagger_file: v3.0/api-with-examples.yaml
url: https://raw.githubusercontent.com/OAI/OpenAPI-Specification/main/tests/
swagger_file: v3.0/pass/api-with-examples.yaml


sink:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ bootstrap:

# Ingestion Recipes
- name: ingestion-datahub-gc
version: v2
optional: true
version: v3
optional: false
mcps_location: "bootstrap_mcps/ingestion-datahub-gc.yaml"
values_env: "DATAHUB_GC_BOOTSTRAP_VALUES"
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
keep_history_max_days: {{execution_request_cleanup.keep_history_max_days}}{{^execution_request_cleanup.keep_history_max_days}}30{{/execution_request_cleanup.keep_history_max_days}}
batch_read_size: {{execution_request_cleanup.batch_read_size}}{{^execution_request_cleanup.batch_read_size}}100{{/execution_request_cleanup.batch_read_size}}
enabled: {{execution_request_cleanup.enabled}}{{^execution_request_cleanup.enabled}}false{{/execution_request_cleanup.enabled}}
extraArgs: {}
extraArgs: {}
debugMode: false
executorId: default
source:
Expand Down

0 comments on commit 1199cc4

Please sign in to comment.