Skip to content

Commit

Permalink
Merge branch 'master' into bug-abs-jsonl
Browse files Browse the repository at this point in the history
  • Loading branch information
acrylJonny authored Nov 6, 2024
2 parents e5e428e + d5d4810 commit 32d6d3e
Show file tree
Hide file tree
Showing 88 changed files with 13,047 additions and 888 deletions.
13 changes: 8 additions & 5 deletions datahub-frontend/app/auth/AuthModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import com.google.inject.Provides;
import com.google.inject.Singleton;
import com.google.inject.name.Named;
import com.linkedin.entity.client.EntityClientConfig;
import com.linkedin.entity.client.SystemEntityClient;
import com.linkedin.entity.client.SystemRestliEntityClient;
import com.linkedin.metadata.models.registry.EmptyEntityRegistry;
Expand Down Expand Up @@ -213,11 +214,13 @@ protected SystemEntityClient provideEntityClient(

return new SystemRestliEntityClient(
buildRestliClient(),
new ExponentialBackoff(configs.getInt(ENTITY_CLIENT_RETRY_INTERVAL)),
configs.getInt(ENTITY_CLIENT_NUM_RETRIES),
configurationProvider.getCache().getClient().getEntityClient(),
Math.max(1, configs.getInt(ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE)),
Math.max(1, configs.getInt(ENTITY_CLIENT_RESTLI_GET_BATCH_CONCURRENCY)));
EntityClientConfig.builder()
.backoffPolicy(new ExponentialBackoff(configs.getInt(ENTITY_CLIENT_RETRY_INTERVAL)))
.retryCount(configs.getInt(ENTITY_CLIENT_NUM_RETRIES))
.batchGetV2Size(configs.getInt(ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE))
.batchGetV2Concurrency(2)
.build(),
configurationProvider.getCache().getClient().getEntityClient());
}

@Provides
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.Map;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.springframework.core.io.ClassPathResource;
Expand Down Expand Up @@ -109,13 +110,29 @@ static List<ObjectNode> resolveMCPTemplate(
AuditStamp auditStamp)
throws IOException {

String template = loadTemplate(mcpTemplate.getMcps_location());
Mustache mustache = MUSTACHE_FACTORY.compile(new StringReader(template), mcpTemplate.getName());
final String template = loadTemplate(mcpTemplate.getMcps_location());
Map<String, Object> scopeValues = resolveValues(opContext, mcpTemplate, auditStamp);

StringWriter writer = new StringWriter();
mustache.execute(writer, scopeValues);
try {
Mustache mustache =
MUSTACHE_FACTORY.compile(new StringReader(template), mcpTemplate.getName());
mustache.execute(writer, scopeValues);
} catch (Exception e) {
log.error(
"Failed to apply mustache template. Template: {} Values: {}",
template,
resolveEnv(mcpTemplate));
throw e;
}

return opContext.getYamlMapper().readValue(writer.toString(), new TypeReference<>() {});
final String yaml = writer.toString();
try {
return opContext.getYamlMapper().readValue(yaml, new TypeReference<>() {});
} catch (Exception e) {
log.error("Failed to parse rendered MCP bootstrap yaml: {}", yaml);
throw e;
}
}

static Map<String, Object> resolveValues(
Expand All @@ -128,13 +145,21 @@ static Map<String, Object> resolveValues(
// built-in
scopeValues.put("auditStamp", RecordUtils.toJsonString(auditStamp));

String envValue = resolveEnv(mcpTemplate);
if (envValue != null) {
scopeValues.putAll(opContext.getObjectMapper().readValue(envValue, new TypeReference<>() {}));
}
return scopeValues;
}

@Nullable
private static String resolveEnv(BootstrapMCPConfigFile.MCPTemplate mcpTemplate) {
if (mcpTemplate.getValues_env() != null
&& !mcpTemplate.getValues_env().isEmpty()
&& System.getenv().containsKey(mcpTemplate.getValues_env())) {
String envValue = System.getenv(mcpTemplate.getValues_env());
scopeValues.putAll(opContext.getObjectMapper().readValue(envValue, new TypeReference<>() {}));
return System.getenv(mcpTemplate.getValues_env());
}
return scopeValues;
return null;
}

private static String loadTemplate(String source) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,6 @@ public void testMCPBatch() throws IOException {
OP_CONTEXT
.getObjectMapper()
.readTree(
"{\"source\":{\"type\":\"datahub-gc\",\"config\":{\"cleanup_expired_tokens\":false,\"truncate_indices\":true,\"dataprocess_cleanup\":{\"retention_days\":10,\"delete_empty_data_jobs\":true,\"delete_empty_data_flows\":true,\"hard_delete_entities\":false,\"keep_last_n\":5},\"soft_deleted_entities_cleanup\":{\"retention_days\":10}}}}"));
"{\"source\":{\"type\":\"datahub-gc\",\"config\":{\"cleanup_expired_tokens\":false,\"truncate_indices\":true,\"dataprocess_cleanup\":{\"retention_days\":10,\"delete_empty_data_jobs\":true,\"delete_empty_data_flows\":true,\"hard_delete_entities\":false,\"keep_last_n\":5},\"soft_deleted_entities_cleanup\":{\"retention_days\":10},\"execution_request_cleanup\":{\"keep_history_min_count\":10,\"keep_history_max_count\":1000,\"keep_history_max_days\":30,\"batch_read_size\":100,\"enabled\":false}}}}"));
}
}
4 changes: 4 additions & 0 deletions datahub-web-react/src/app/ingest/source/builder/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import clickhouseLogo from '../../../../images/clickhouselogo.png';
import cockroachdbLogo from '../../../../images/cockroachdblogo.png';
import trinoLogo from '../../../../images/trinologo.png';
import dbtLogo from '../../../../images/dbtlogo.png';
import dremioLogo from '../../../../images/dremiologo.png';
import druidLogo from '../../../../images/druidlogo.png';
import elasticsearchLogo from '../../../../images/elasticsearchlogo.png';
import feastLogo from '../../../../images/feastlogo.png';
Expand Down Expand Up @@ -52,6 +53,8 @@ export const COCKROACHDB = 'cockroachdb';
export const COCKROACHDB_URN = `urn:li:dataPlatform:${COCKROACHDB}`;
export const DBT = 'dbt';
export const DBT_URN = `urn:li:dataPlatform:${DBT}`;
export const DREMIO = 'dremio';
export const DREMIO_URN = `urn:li:dataPlatform:${DREMIO}`;
export const DRUID = 'druid';
export const DRUID_URN = `urn:li:dataPlatform:${DRUID}`;
export const DYNAMODB = 'dynamodb';
Expand Down Expand Up @@ -139,6 +142,7 @@ export const PLATFORM_URN_TO_LOGO = {
[CLICKHOUSE_URN]: clickhouseLogo,
[COCKROACHDB_URN]: cockroachdbLogo,
[DBT_URN]: dbtLogo,
[DREMIO_URN]: dremioLogo,
[DRUID_URN]: druidLogo,
[DYNAMODB_URN]: dynamodbLogo,
[ELASTICSEARCH_URN]: elasticsearchLogo,
Expand Down
8 changes: 8 additions & 0 deletions datahub-web-react/src/app/ingest/source/builder/sources.json
Original file line number Diff line number Diff line change
Expand Up @@ -302,5 +302,13 @@
"description": "Configure a custom recipe using YAML.",
"docsUrl": "https://datahubproject.io/docs/metadata-ingestion/",
"recipe": "source:\n type: <source-type>\n config:\n # Source-type specifics config\n <source-configs>"
},
{
"urn": "urn:li:dataPlatform:dremio",
"name": "dremio",
"displayName": "Dremio",
"description": "Import Spaces, Sources, Tables and statistics from Dremio.",
"docsUrl": "https://datahubproject.io/docs/metadata-ingestion/",
"recipe": "source:\n type: dremio\n config:\n # Coordinates\n hostname: null\n port: null\n #true if https, otherwise false\n tls: true\n\n #For cloud instance\n #is_dremio_cloud: True\n #dremio_cloud_project_id: <project_id>\n\n #Credentials with personal access token\n authentication_method: PAT\n password: pass\n\n #Or Credentials with basic auth\n #authentication_method: password\n #username: null\n #password: null\n\n stateful_ingestion:\n enabled: true"
}
]
Binary file added datahub-web-react/src/images/dremiologo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 10 additions & 0 deletions docs-website/adoptionStoriesIndexes.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,16 @@
"category": "Financial & Fintech",
"description": "<i>\"We found DataHub to provide excellent coverage for our needs. What we appreciate most about DataHub is <b>its powerful API platform.</b>\"<br /><br /><div style='color: gray;'>— Jean-Pierre Dijcks, Sr. Dir. Product Management at VISA</div></i><br />"
},
{
"name": "Apple",
"slug": "apple",
"link": "https://youtu.be/5eFZuzG4c-s?feature=shared",
"linkType": "video",
"tagline": "How Apple built a solid foundation for observability, governance, and data sharing with DataHub",
"category": "B2B & B2C",
"platform": "cloud",
"description": "Discover how DataHub provides a solid foundation for observability, governance, and data sharing, while we explore its role in managing AI and data metadata."
},
{
"name": "Notion",
"slug": "notion",
Expand Down
4 changes: 2 additions & 2 deletions docs-website/docusaurus.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ module.exports = {
// },
// }),
announcementBar: {
id: "announcement-2",
id: "announcement-3",
content:
'<div style="display: flex; justify-content: center; align-items: center;width: 100%;"><!--img src="/img/acryl-logo-white-mark.svg" / --><div style="font-size: .8rem; font-weight: 600; background-color: white; color: #111; padding: 0px 8px; border-radius: 4px; margin-right:12px;">NEW</div><p>Join us at Metadata & AI Summit, Oct. 29 & 30!</p><a href="http://www.acryldata.io/conference?utm_source=datahub_web&utm_medium=metadata_ai_2024&utm_campaign=home_banner" target="_blank" class="button">Register<span> →</span></a></div>',
'<div style="display: flex; justify-content: center; align-items: center;width: 100%;"><!--img src="/img/acryl-logo-white-mark.svg" / --><!--div style="font-size: .8rem; font-weight: 600; background-color: white; color: #111; padding: 0px 8px; border-radius: 4px; margin-right:12px;">NEW</div--><p>Watch Metadata & AI Summit sessions on-demand.</p><a href="https://www.youtube.com/@DataHubProject/videos" target="_blank" class="button">Watch Now<span> →</span></a></div>',
backgroundColor: "#111",
textColor: "#ffffff",
isCloseable: false,
Expand Down
2 changes: 2 additions & 0 deletions docs-website/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
"@octokit/plugin-throttling": "^3.5.1",
"@octokit/rest": "^18.6.2",
"@radix-ui/react-visually-hidden": "^1.0.2",
"@servicebell/widget": "^0.1.6",
"@supabase/supabase-js": "^2.33.1",
"@swc/core": "^1.4.2",
"antd": "^5.0.7",
Expand All @@ -47,6 +48,7 @@
"markprompt": "^0.1.7",
"react": "^18.2.0",
"react-dom": "18.2.0",
"react-use-draggable-scroll": "^0.4.7",
"sass": "^1.43.2",
"swc-loader": "^0.2.6",
"swiper": "^11.1.4",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
flex: 1;
}
.cardLink {
user-select: none;
-webkit-user-drag: none;

color: #000;

&:hover {
Expand Down Expand Up @@ -88,6 +91,8 @@
z-index: 10;
filter: brightness(2);
opacity: .9;
user-select: none;
-webkit-user-drag: none;
}
.cardImageBackground {
position: absolute;
Expand Down
10 changes: 10 additions & 0 deletions docs-website/src/pages/_components/CaseStudy/caseStudyContent.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ const caseStudyData = [
image: "https://datahubproject.io/img/logos/companies/netflix.png",
link: "https://datahubproject.io/adoption-stories/#netflix",
},
{
title: "A Solid Foundation For Data and AI",
description:
"How Apple built a solid foundation for observability, governance, and data sharing with DataHub.",
tag: "Technology",
backgroundImage:
"https://upload.wikimedia.org/wikipedia/commons/thumb/5/5a/Aerial_view_of_Apple_Park_dllu.jpg/2560px-Aerial_view_of_Apple_Park_dllu.jpg",
image: "/img/logos/companies/apple_text.png",
link: "https://datahubproject.io/adoption-stories/#apple",
},
{
title: "Scaling Data Governance",
description:
Expand Down
16 changes: 10 additions & 6 deletions docs-website/src/pages/_components/CaseStudy/index.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import React from "react";
import React, { useRef } from "react";
import styles from "./case-study.module.scss";
import Link from '@docusaurus/Link'
import { Carousel } from "antd";
import { useDraggable } from "react-use-draggable-scroll";
import caseStudyData from "./caseStudyContent";


const CaseStudy = () => {
const containerRef = useRef(null);

const { events } = useDraggable(containerRef);

return (
<div className={styles.container}>
{/* Section-1 */}
Expand All @@ -15,16 +19,16 @@ const CaseStudy = () => {
<p>Across finance, healthcare, e-commerce, and countless more.</p>
</div>

<div className={styles.card_row}>
<div className={styles.card_row_wrapper} >
<div className={styles.card_row} {...events} ref={containerRef}>
<div className={styles.card_row_wrapper}>
{caseStudyData.map((caseStudy) => (
<div className={styles.card} key={caseStudy.link}>
<a className={styles.cardLink} href={caseStudy.link} style={caseStudy.backgroundImage ? null : {
<a draggable={false} className={styles.cardLink} href={caseStudy.link} style={caseStudy.backgroundImage ? null : {
opacity: .5
}}>
{caseStudy.tag ? <span className={styles.card_tag}>{caseStudy.tag}</span> : null}
<div className={styles.card_image} style={{ backgroundColor: caseStudy.backgroundImage ? null : '#eee' }}>
<img src={caseStudy.image} alt="" />
<img src={caseStudy.image} draggable={false} alt="" />
<div className={styles.cardImageBackground} style={{ backgroundImage: `url(${caseStudy.backgroundImage})` }} />
</div>
<div className={styles.card_heading_div}>
Expand Down
9 changes: 8 additions & 1 deletion docs-website/src/pages/cloud/index.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import React, { useState } from "react";
import React, { useEffect, useState } from "react";
import Layout from "@theme/Layout";
import Link from "@docusaurus/Link";
import useDocusaurusContext from "@docusaurus/useDocusaurusContext";
Expand All @@ -10,8 +10,11 @@ import UnifiedTabs from "./UnifiedTabs";
import FeatureCards from "./FeatureCards";
import Hero from "./Hero";
import DemoForm from "./DemoForm";
import ServiceBell from "@servicebell/widget";
import DemoFormModal from "./DemoFormModal";

const SERVICE_BELL_ID = "00892146e5bc46d98d55ecc2b2fa67e2";

function Home() {
const context = useDocusaurusContext();
const { siteConfig = {} } = context;
Expand All @@ -24,6 +27,10 @@ function Home() {
window.location.replace("/docs");
}

useEffect(() => {
ServiceBell("init", SERVICE_BELL_ID, { hidden: false });
}, []);

return !siteConfig.customFields.isSaas ? (
<Layout
title={'DataHub Cloud - Unify Data Observability, Governance and Discovery'}
Expand Down
6 changes: 5 additions & 1 deletion docs-website/src/theme/Footer/index.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import React from "react";
import { useLocation } from "react-router-dom"; // Import useLocation from react-router-dom
import Footer from "@theme-original/Footer";
import MarkpromptHelp from "../../components/MarkpromptHelp";

export default function FooterWrapper(props) {
const location = useLocation(); // Get the current location
const isDocsPage = location.pathname.startsWith("/docs"); // Check if the path starts with /docs

return (
<>
<MarkpromptHelp />
{isDocsPage && <MarkpromptHelp />}
<Footer {...props} />
</>
);
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
20 changes: 11 additions & 9 deletions docs-website/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1827,7 +1827,7 @@
"@docusaurus/theme-search-algolia" "2.4.3"
"@docusaurus/types" "2.4.3"

"@docusaurus/[email protected]":
"@docusaurus/[email protected]", "react-loadable@npm:@docusaurus/[email protected]":
version "5.5.2"
resolved "https://registry.yarnpkg.com/@docusaurus/react-loadable/-/react-loadable-5.5.2.tgz#81aae0db81ecafbdaee3651f12804580868fa6ce"
integrity sha512-A3dYjdBGuy0IGT+wyLIGIKLRE+sAk1iNk0f1HjNDysO7u8lhL4N3VEm+FAubmJbAztn94F7MxBTPmnixbiyFdQ==
Expand Down Expand Up @@ -2843,6 +2843,11 @@
rc-resize-observer "^1.3.1"
rc-util "^5.38.0"

"@servicebell/widget@^0.1.6":
version "0.1.6"
resolved "https://registry.yarnpkg.com/@servicebell/widget/-/widget-0.1.6.tgz#04672a7e7b14ff7025ec83fd740373345c359d74"
integrity sha512-Kh21FAETJlk32MRXHGYKxFTWLbbLlgLX3lKYH/02KHaQGIPRdHh/Ok7DiRc+/6f/OTQq3x3ady7e4o9weV5yQg==

"@sideway/address@^4.1.5":
version "4.1.5"
resolved "https://registry.yarnpkg.com/@sideway/address/-/address-4.1.5.tgz#4bc149a0076623ced99ca8208ba780d65a99b9d5"
Expand Down Expand Up @@ -9708,14 +9713,6 @@ react-loadable-ssr-addon-v5-slorber@^1.0.1:
dependencies:
"@babel/runtime" "^7.10.3"

"react-loadable@npm:@docusaurus/[email protected]":
version "5.5.2"
resolved "https://registry.yarnpkg.com/@docusaurus/react-loadable/-/react-loadable-5.5.2.tgz#81aae0db81ecafbdaee3651f12804580868fa6ce"
integrity sha512-A3dYjdBGuy0IGT+wyLIGIKLRE+sAk1iNk0f1HjNDysO7u8lhL4N3VEm+FAubmJbAztn94F7MxBTPmnixbiyFdQ==
dependencies:
"@types/react" "*"
prop-types "^15.6.2"

react-markdown@^8.0.6:
version "8.0.7"
resolved "https://registry.yarnpkg.com/react-markdown/-/react-markdown-8.0.7.tgz#c8dbd1b9ba5f1c5e7e5f2a44de465a3caafdf89b"
Expand Down Expand Up @@ -9830,6 +9827,11 @@ react-textarea-autosize@^8.3.2:
use-composed-ref "^1.3.0"
use-latest "^1.2.1"

react-use-draggable-scroll@^0.4.7:
version "0.4.7"
resolved "https://registry.yarnpkg.com/react-use-draggable-scroll/-/react-use-draggable-scroll-0.4.7.tgz#86e77caab921ca07b134e9e1d1bc1810aeee4916"
integrity sha512-6gCxGPO9WV5dIsBaDrgUKBaac8CY07PkygcArfajijYSNDwAq0girDRjaBuF1+lRqQryoLFQfpVaV2u/Yh6CrQ==

react-waypoint@^10.3.0:
version "10.3.0"
resolved "https://registry.yarnpkg.com/react-waypoint/-/react-waypoint-10.3.0.tgz#fcc60e86c6c9ad2174fa58d066dc6ae54e3df71d"
Expand Down
1 change: 1 addition & 0 deletions docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,7 @@ Please see our [Integrations page](https://datahubproject.io/integrations) if yo
| [datahub-lineage-file](./generated/ingestion/sources/file-based-lineage.md) | _no additional dependencies_ | Lineage File source |
| [datahub-business-glossary](./generated/ingestion/sources/business-glossary.md) | _no additional dependencies_ | Business Glossary File source |
| [dbt](./generated/ingestion/sources/dbt.md) | _no additional dependencies_ | dbt source |
| [dremio](./generated/ingestion/sources/dremio.md) | `pip install 'acryl-datahub[dremio]'` | Dremio Source |
| [druid](./generated/ingestion/sources/druid.md) | `pip install 'acryl-datahub[druid]'` | Druid Source |
| [feast](./generated/ingestion/sources/feast.md) | `pip install 'acryl-datahub[feast]'` | Feast source (0.26.0) |
| [glue](./generated/ingestion/sources/glue.md) | `pip install 'acryl-datahub[glue]'` | AWS Glue source |
Expand Down
1 change: 1 addition & 0 deletions docs/how/updating-datahub.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
`datahub delete --platform powerbi --soft` and then re-ingest with the latest CLI version, ensuring the `include_workspace_name_in_dataset_urn` configuration is set to true.

- #11701: The Fivetran `sources_to_database` field is deprecated in favor of setting directly within `sources_to_platform_instance.<key>.database`.
- #11742: For PowerBi ingestion, `use_powerbi_email` is now enabled by default when extracting ownership information.

### Breaking Changes

Expand Down
2 changes: 2 additions & 0 deletions li-utils/src/main/java/com/linkedin/metadata/Constants.java
Original file line number Diff line number Diff line change
Expand Up @@ -464,5 +464,7 @@ public class Constants {
public static final String MDC_ENTITY_TYPE = "entityType";
public static final String MDC_CHANGE_TYPE = "changeType";

public static final String RESTLI_SUCCESS = "success";

private Constants() {}
}
11 changes: 11 additions & 0 deletions metadata-ingestion/docs/sources/dremio/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
### Concept Mapping

Here's a table for **Concept Mapping** between Dremio and DataHub to provide a clear overview of how entities and concepts in Dremio are mapped to corresponding entities in DataHub:

| Source Concept | DataHub Concept | Notes |
| -------------------------- | --------------- | ---------------------------------------------------------- |
| **Physical Dataset/Table** | `Dataset` | Subtype: `Table` |
| **Virtual Dataset/Views** | `Dataset` | Subtype: `View` |
| **Spaces** | `Container` | Mapped to DataHub’s `Container` aspect. Subtype: `Space` |
| **Folders** | `Container` | Mapped as a `Container` in DataHub. Subtype: `Folder` |
| **Sources** | `Container` | Represented as a `Container` in DataHub. Subtype: `Source` |
Loading

0 comments on commit 32d6d3e

Please sign in to comment.