diff --git a/README.md b/README.md index 951dcebad6498..79f85433fbc18 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ export const Logo = (props) => {
DataHub Logo
@@ -18,7 +18,7 @@ export const Logo = (props) => {

-DataHub +DataHub

@@ -156,7 +156,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to - [DataHub Blog](https://blog.datahubproject.io/) - [DataHub YouTube Channel](https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w) -- [Optum: Data Mesh via DataHub](https://optum.github.io/blog/2022/03/23/data-mesh-via-datahub/) +- [Optum: Data Mesh via DataHub](https://opensource.optum.com/blog/2022/03/23/data-mesh-via-datahub) - [Saxo Bank: Enabling Data Discovery in Data Mesh](https://medium.com/datahub-project/enabling-data-discovery-in-a-data-mesh-the-saxo-journey-451b06969c8f) - [Bringing The Power Of The DataHub Real-Time Metadata Graph To Everyone At Acryl Data](https://www.dataengineeringpodcast.com/acryl-data-datahub-metadata-graph-episode-230/) - [DataHub: Popular Metadata Architectures Explained](https://engineering.linkedin.com/blog/2020/datahub-popular-metadata-architectures-explained) diff --git a/docs-website/build.gradle b/docs-website/build.gradle index 370ae3eec9176..a213ec1ae8194 100644 --- a/docs-website/build.gradle +++ b/docs-website/build.gradle @@ -89,7 +89,7 @@ task fastReload(type: YarnTask) { args = ['run', 'generate-rsync'] } -task yarnLint(type: YarnTask, dependsOn: [yarnInstall]) { +task yarnLint(type: YarnTask, dependsOn: [yarnInstall, yarnGenerate]) { inputs.files(projectMdFiles) args = ['run', 'lint-check'] outputs.dir("dist") diff --git a/docs-website/markdown-link-check-config.json b/docs-website/markdown-link-check-config.json new file mode 100644 index 0000000000000..26e040edde6f7 --- /dev/null +++ b/docs-website/markdown-link-check-config.json @@ -0,0 +1,50 @@ +{ + "ignorePatterns": [ + { + "pattern": "^http://demo\\.datahubproject\\.io" + }, + { + "pattern": "^http://localhost" + }, + { + "pattern": "^http://www.famfamfam.com" + }, + { + "pattern": "^http://www.linkedin.com" + }, + { + "pattern": "\\.md$" + }, + { + "pattern":"\\.json$" + }, + { + "pattern":"\\.txt$" + }, + { + "pattern": "\\.java$" + }, + { + "pattern": "\\.md#.*$" + }, + { + "pattern": "^https://oauth2.googleapis.com/token" + }, + { + "pattern": "^https://login.microsoftonline.com/common/oauth2/na$" + }, + { + "pattern": "#v(\\d+)-(\\d+)-(\\d+)" + }, + { + "pattern": "^https://github.com/mohdsiddique$" + }, + { + "pattern": "^https://github.com/2x$" + }, + { + "pattern": "^https://github.com/datahub-project/datahub/assets/15873986/2f47d033-6c2b-483a-951d-e6d6b807f0d0%22%3E$" + } + ], + "aliveStatusCodes": [200, 206, 0, 999, 400, 401, 403] +} \ No newline at end of file diff --git a/docs-website/package.json b/docs-website/package.json index 400ef4143c786..1722f92169692 100644 --- a/docs-website/package.json +++ b/docs-website/package.json @@ -17,7 +17,7 @@ "generate": "rm -rf genDocs genStatic && mkdir genDocs genStatic && yarn _generate-docs && mv docs/* genDocs/ && rmdir docs", "generate-rsync": "mkdir -p genDocs genStatic && yarn _generate-docs && rsync -v --checksum -r -h -i --delete docs/ genDocs && rm -rf docs", "lint": "prettier -w generateDocsDir.ts sidebars.js src/pages/index.js", - "lint-check": "prettier -l generateDocsDir.ts sidebars.js src/pages/index.js", + "lint-check": "prettier -l generateDocsDir.ts sidebars.js src/pages/index.js && find ./genDocs -name \\*.md -not -path \"./genDocs/python-sdk/models.md\" -print0 | xargs -0 -n1 markdown-link-check -p -q -c markdown-link-check-config.json", "lint-fix": "prettier --write generateDocsDir.ts sidebars.js src/pages/index.js" }, "dependencies": { @@ -37,6 +37,7 @@ "docusaurus-graphql-plugin": "0.5.0", "docusaurus-plugin-sass": "^0.2.1", "dotenv": "^16.0.1", + "markdown-link-check": "^3.11.2", "markprompt": "^0.1.7", "react": "^18.2.0", "react-dom": "18.2.0", diff --git a/docs-website/yarn.lock b/docs-website/yarn.lock index 0613fe71ef78e..5698029bff70a 100644 --- a/docs-website/yarn.lock +++ b/docs-website/yarn.lock @@ -3414,6 +3414,11 @@ async-validator@^4.1.0: resolved "https://registry.yarnpkg.com/async-validator/-/async-validator-4.2.5.tgz#c96ea3332a521699d0afaaceed510a54656c6339" integrity sha512-7HhHjtERjqlNbZtqNqy2rckN/SpOOlmDliet+lP7k+eKZEjPk3DgyeU9lIXLdeLz0uBbbVp+9Qdow9wJWgwwfg== +async@^3.2.4: + version "3.2.4" + resolved "https://registry.yarnpkg.com/async/-/async-3.2.4.tgz#2d22e00f8cddeb5fde5dd33522b56d1cf569a81c" + integrity sha512-iAB+JbDEGXhyIUavoDl9WP/Jj106Kz9DEn1DPgYw5ruDn0e3Wgi3sKFm55sASdGBNOQB8F59d9qQ7deqrHA8wQ== + asynckit@^0.4.0: version "0.4.0" resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79" @@ -3765,6 +3770,11 @@ chalk@^4.0.0, chalk@^4.1.0, chalk@^4.1.2: ansi-styles "^4.1.0" supports-color "^7.1.0" +chalk@^5.2.0: + version "5.3.0" + resolved "https://registry.yarnpkg.com/chalk/-/chalk-5.3.0.tgz#67c20a7ebef70e7f3970a01f90fa210cb6860385" + integrity sha512-dLitG79d+GV1Nb/VYcCDFivJeK1hiukt9QjRNVOsUtTy1rR1YJsmpGGTZ3qJos+uw7WmWF4wUwBd9jxjocFC2w== + character-entities-legacy@^1.0.0: version "1.1.4" resolved "https://registry.yarnpkg.com/character-entities-legacy/-/character-entities-legacy-1.1.4.tgz#94bc1845dce70a5bb9d2ecc748725661293d8fc1" @@ -3797,7 +3807,7 @@ cheerio-select@^2.1.0: domhandler "^5.0.3" domutils "^3.0.1" -cheerio@^1.0.0-rc.12: +cheerio@^1.0.0-rc.10, cheerio@^1.0.0-rc.12: version "1.0.0-rc.12" resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.12.tgz#788bf7466506b1c6bf5fae51d24a2c4d62e47683" integrity sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q== @@ -3984,6 +3994,11 @@ comma-separated-tokens@^2.0.0: resolved "https://registry.yarnpkg.com/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz#4e89c9458acb61bc8fef19f4529973b2392839ee" integrity sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg== +commander@^10.0.1: + version "10.0.1" + resolved "https://registry.yarnpkg.com/commander/-/commander-10.0.1.tgz#881ee46b4f77d1c1dccc5823433aa39b022cbe06" + integrity sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug== + commander@^2.20.0: version "2.20.3" resolved "https://registry.yarnpkg.com/commander/-/commander-2.20.3.tgz#fd485e84c03eb4881c20722ba48035e8531aeb33" @@ -4385,6 +4400,13 @@ debug@4, debug@^4.0.0, debug@^4.1.0, debug@^4.1.1: dependencies: ms "2.1.2" +debug@^3.2.6: + version "3.2.7" + resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a" + integrity sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ== + dependencies: + ms "^2.1.1" + decode-named-character-reference@^1.0.0: version "1.0.2" resolved "https://registry.yarnpkg.com/decode-named-character-reference/-/decode-named-character-reference-1.0.2.tgz#daabac9690874c394c81e4162a0304b35d824f0e" @@ -5551,6 +5573,13 @@ html-entities@^2.3.2: resolved "https://registry.yarnpkg.com/html-entities/-/html-entities-2.4.0.tgz#edd0cee70402584c8c76cc2c0556db09d1f45061" integrity sha512-igBTJcNNNhvZFRtm8uA6xMY6xYleeDwn3PeBCkDz7tHttv4F2hsDI2aPgNERWzvRcNYHNT3ymRaQzllmXj4YsQ== +html-link-extractor@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/html-link-extractor/-/html-link-extractor-1.0.5.tgz#a4be345cb13b8c3352d82b28c8b124bb7bf5dd6f" + integrity sha512-ADd49pudM157uWHwHQPUSX4ssMsvR/yHIswOR5CUfBdK9g9ZYGMhVSE6KZVHJ6kCkR0gH4htsfzU6zECDNVwyw== + dependencies: + cheerio "^1.0.0-rc.10" + html-minifier-terser@^6.0.2, html-minifier-terser@^6.1.0: version "6.1.0" resolved "https://registry.yarnpkg.com/html-minifier-terser/-/html-minifier-terser-6.1.0.tgz#bfc818934cc07918f6b3669f5774ecdfd48f32ab" @@ -5673,6 +5702,13 @@ iconv-lite@0.4.24: dependencies: safer-buffer ">= 2.1.2 < 3" +iconv-lite@^0.6.3: + version "0.6.3" + resolved "https://registry.yarnpkg.com/iconv-lite/-/iconv-lite-0.6.3.tgz#a52f80bf38da1952eb5c681790719871a1a72501" + integrity sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw== + dependencies: + safer-buffer ">= 2.1.2 < 3.0.0" + icss-utils@^5.0.0, icss-utils@^5.1.0: version "5.1.0" resolved "https://registry.yarnpkg.com/icss-utils/-/icss-utils-5.1.0.tgz#c6be6858abd013d768e98366ae47e25d5887b1ae" @@ -5795,6 +5831,11 @@ ipaddr.js@^2.0.1: resolved "https://registry.yarnpkg.com/ipaddr.js/-/ipaddr.js-2.1.0.tgz#2119bc447ff8c257753b196fc5f1ce08a4cdf39f" integrity sha512-LlbxQ7xKzfBusov6UMi4MFpEg0m+mAm9xyNGEduwXMEDuf4WfzB/RZwMVYEd7IKGvh4IUkEXYxtAVu9T3OelJQ== +is-absolute-url@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/is-absolute-url/-/is-absolute-url-4.0.1.tgz#16e4d487d4fded05cfe0685e53ec86804a5e94dc" + integrity sha512-/51/TKE88Lmm7Gc4/8btclNXWS+g50wXhYJq8HWIBAGUBnoAdRu1aXeh364t/O7wXDAcTJDP8PNuNKWUDWie+A== + is-alphabetical@1.0.4, is-alphabetical@^1.0.0: version "1.0.4" resolved "https://registry.yarnpkg.com/is-alphabetical/-/is-alphabetical-1.0.4.tgz#9e7d6b94916be22153745d184c298cbf986a686d" @@ -5963,6 +6004,13 @@ is-regexp@^1.0.0: resolved "https://registry.yarnpkg.com/is-regexp/-/is-regexp-1.0.0.tgz#fd2d883545c46bac5a633e7b9a09e87fa2cb5069" integrity sha512-7zjFAPO4/gwyQAAgRRmqeEeyIICSdmCqa3tsVHMdBzaXXRiqopZL4Cyghg/XulGWrtABTpbnYYzzIRffLkP4oA== +is-relative-url@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/is-relative-url/-/is-relative-url-4.0.0.tgz#4d8371999ff6033b76e4d9972fb5bf496fddfa97" + integrity sha512-PkzoL1qKAYXNFct5IKdKRH/iBQou/oCC85QhXj6WKtUQBliZ4Yfd3Zk27RHu9KQG8r6zgvAA2AQKC9p+rqTszg== + dependencies: + is-absolute-url "^4.0.1" + is-root@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/is-root/-/is-root-2.1.0.tgz#809e18129cf1129644302a4f8544035d51984a9c" @@ -6010,6 +6058,13 @@ isarray@~1.0.0: resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11" integrity sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ== +isemail@^3.2.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/isemail/-/isemail-3.2.0.tgz#59310a021931a9fb06bbb51e155ce0b3f236832c" + integrity sha512-zKqkK+O+dGqevc93KNsbZ/TqTUFd46MwWjYOoMrjIMZ51eU7DtQG3Wmd9SQQT7i7RVnuTPEiYEWHU3MSbxC1Tg== + dependencies: + punycode "2.x.x" + isexe@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10" @@ -6205,6 +6260,16 @@ lines-and-columns@^1.1.6: resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.2.4.tgz#eca284f75d2965079309dc0ad9255abb2ebc1632" integrity sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg== +link-check@^5.2.0: + version "5.2.0" + resolved "https://registry.yarnpkg.com/link-check/-/link-check-5.2.0.tgz#595a339d305900bed8c1302f4342a29c366bf478" + integrity sha512-xRbhYLaGDw7eRDTibTAcl6fXtmUQ13vkezQiTqshHHdGueQeumgxxmQMIOmJYsh2p8BF08t8thhDQ++EAOOq3w== + dependencies: + is-relative-url "^4.0.0" + isemail "^3.2.0" + ms "^2.1.3" + needle "^3.1.0" + loader-runner@^4.2.0: version "4.3.0" resolved "https://registry.yarnpkg.com/loader-runner/-/loader-runner-4.3.0.tgz#c1b4a163b99f614830353b16755e7149ac2314e1" @@ -6366,6 +6431,28 @@ markdown-escapes@^1.0.0: resolved "https://registry.yarnpkg.com/markdown-escapes/-/markdown-escapes-1.0.4.tgz#c95415ef451499d7602b91095f3c8e8975f78535" integrity sha512-8z4efJYk43E0upd0NbVXwgSTQs6cT3T06etieCMEg7dRbzCbxUCK/GHlX8mhHRDcp+OLlHkPKsvqQTCvsRl2cg== +markdown-link-check@^3.11.2: + version "3.11.2" + resolved "https://registry.yarnpkg.com/markdown-link-check/-/markdown-link-check-3.11.2.tgz#303a8a03d4a34c42ef3158e0b245bced26b5d904" + integrity sha512-zave+vI4AMeLp0FlUllAwGbNytSKsS3R2Zgtf3ufVT892Z/L6Ro9osZwE9PNA7s0IkJ4onnuHqatpsaCiAShJw== + dependencies: + async "^3.2.4" + chalk "^5.2.0" + commander "^10.0.1" + link-check "^5.2.0" + lodash "^4.17.21" + markdown-link-extractor "^3.1.0" + needle "^3.2.0" + progress "^2.0.3" + +markdown-link-extractor@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/markdown-link-extractor/-/markdown-link-extractor-3.1.0.tgz#0d5a703630d791a9e2017449e1a9b294f2d2b676" + integrity sha512-r0NEbP1dsM+IqB62Ru9TXLP/HDaTdBNIeylYXumuBi6Xv4ufjE1/g3TnslYL8VNqNcGAGbMptQFHrrdfoZ/Sug== + dependencies: + html-link-extractor "^1.0.5" + marked "^4.1.0" + markdown-table@^3.0.0: version "3.0.3" resolved "https://registry.yarnpkg.com/markdown-table/-/markdown-table-3.0.3.tgz#e6331d30e493127e031dd385488b5bd326e4a6bd" @@ -6376,6 +6463,11 @@ marked@^2.0.3: resolved "https://registry.yarnpkg.com/marked/-/marked-2.1.3.tgz#bd017cef6431724fd4b27e0657f5ceb14bff3753" integrity sha512-/Q+7MGzaETqifOMWYEA7HVMaZb4XbcRfaOzcSsHZEith83KGlvaSG33u0SKu89Mj5h+T8V2hM+8O45Qc5XTgwA== +marked@^4.1.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/marked/-/marked-4.3.0.tgz#796362821b019f734054582038b116481b456cf3" + integrity sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A== + markprompt@^0.1.7: version "0.1.7" resolved "https://registry.yarnpkg.com/markprompt/-/markprompt-0.1.7.tgz#fa049e11109d93372c45c38b3ca40bd5fdf751ea" @@ -6978,7 +7070,7 @@ ms@2.1.2: resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009" integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w== -ms@2.1.3: +ms@2.1.3, ms@^2.1.1, ms@^2.1.3: version "2.1.3" resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2" integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA== @@ -7001,6 +7093,15 @@ napi-build-utils@^1.0.1: resolved "https://registry.yarnpkg.com/napi-build-utils/-/napi-build-utils-1.0.2.tgz#b1fddc0b2c46e380a0b7a76f984dd47c41a13806" integrity sha512-ONmRUqK7zj7DWX0D9ADe03wbwOBZxNAfF20PlGfCWQcD3+/MakShIHrMqx9YwPTfxDdF1zLeL+RGZiR9kGMLdg== +needle@^3.1.0, needle@^3.2.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/needle/-/needle-3.2.0.tgz#07d240ebcabfd65c76c03afae7f6defe6469df44" + integrity sha512-oUvzXnyLiVyVGoianLijF9O/RecZUf7TkBfimjGrLM4eQhXyeJwM6GeAWccwfQ9aa4gMCZKqhAOuLaMIcQxajQ== + dependencies: + debug "^3.2.6" + iconv-lite "^0.6.3" + sax "^1.2.4" + negotiator@0.6.3: version "0.6.3" resolved "https://registry.yarnpkg.com/negotiator/-/negotiator-0.6.3.tgz#58e323a72fedc0d6f9cd4d31fe49f51479590ccd" @@ -7753,6 +7854,11 @@ process-nextick-args@~2.0.0: resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2" integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag== +progress@^2.0.3: + version "2.0.3" + resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8" + integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA== + promise@^7.1.1: version "7.3.1" resolved "https://registry.yarnpkg.com/promise/-/promise-7.3.1.tgz#064b72602b18f90f29192b8b1bc418ffd1ebd3bf" @@ -7805,16 +7911,16 @@ pump@^3.0.0: end-of-stream "^1.1.0" once "^1.3.1" +punycode@2.x.x, punycode@^2.1.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.0.tgz#f67fa67c94da8f4d0cfff981aee4118064199b8f" + integrity sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA== + punycode@^1.3.2: version "1.4.1" resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.4.1.tgz#c0d5a63b2718800ad8e1eb0fa5269c84dd41845e" integrity sha512-jmYNElW7yvO7TV33CjSmvSiE2yco3bV2czu/OzDKdMNVZQWfxCblURLhf+47syQRBntjfLdd/H0egrzIG+oaFQ== -punycode@^2.1.0: - version "2.3.0" - resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.0.tgz#f67fa67c94da8f4d0cfff981aee4118064199b8f" - integrity sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA== - pupa@^2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/pupa/-/pupa-2.1.1.tgz#f5e8fd4afc2c5d97828faa523549ed8744a20d62" @@ -8789,7 +8895,7 @@ safe-buffer@5.2.1, safe-buffer@>=5.1.0, safe-buffer@^5.0.1, safe-buffer@^5.1.0, resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6" integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ== -"safer-buffer@>= 2.1.2 < 3": +"safer-buffer@>= 2.1.2 < 3", "safer-buffer@>= 2.1.2 < 3.0.0": version "2.1.2" resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a" integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg== diff --git a/docs/advanced/no-code-modeling.md b/docs/advanced/no-code-modeling.md index d76b776d3dddb..172e63f821eab 100644 --- a/docs/advanced/no-code-modeling.md +++ b/docs/advanced/no-code-modeling.md @@ -100,10 +100,9 @@ Currently, there are various models in GMS: 1. [Urn](https://github.com/datahub-project/datahub/blob/master/li-utils/src/main/pegasus/com/linkedin/common/DatasetUrn.pdl) - Structs composing primary keys 2. [Root] [Snapshots](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/snapshot/Snapshot.pdl) - Container of aspects 3. [Aspects](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/DashboardAspect.pdl) - Optional container of fields -4. [Values](https://github.com/datahub-project/datahub/blob/master/gms/api/src/main/pegasus/com/linkedin/dataset/Dataset.pdl), [Keys](https://github.com/datahub-project/datahub/blob/master/gms/api/src/main/pegasus/com/linkedin/dataset/DatasetKey.pdl) - Model returned by GMS [Rest.li](http://rest.li) API (public facing) -5. [Entities](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/entity/DatasetEntity.pdl) - Records with fields derived from the URN. Used only in graph / relationships -6. [Relationships](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/relationship/Relationship.pdl) - Edges between 2 entities with optional edge properties -7. [Search Documents](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/search/ChartDocument.pdl) - Flat documents for indexing within Elastic index +4. [Keys](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DatasetKey.pdl) - Model returned by GMS [Rest.li](http://rest.li) API (public facing) +5. [Relationships](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/common/EntityRelationship.pdl) - Edges between 2 entities with optional edge properties +6. Search Documents - Flat documents for indexing within Elastic index - And corresponding index [mappings.json](https://github.com/datahub-project/datahub/blob/master/gms/impl/src/main/resources/index/chart/mappings.json), [settings.json](https://github.com/datahub-project/datahub/blob/master/gms/impl/src/main/resources/index/chart/settings.json) Various components of GMS depend on / make assumptions about these model types: diff --git a/docs/api/graphql/how-to-set-up-graphql.md b/docs/api/graphql/how-to-set-up-graphql.md index 584bf34ad3f92..2be2f935b12b1 100644 --- a/docs/api/graphql/how-to-set-up-graphql.md +++ b/docs/api/graphql/how-to-set-up-graphql.md @@ -68,7 +68,7 @@ In the request body, select the `GraphQL` option and enter your GraphQL query in

-Please refer to [Querying with GraphQL](https://learning.postman.com/docs/sending-requests/graphql/graphql/) in the Postman documentation for more information. +Please refer to [Querying with GraphQL](https://learning.postman.com/docs/sending-requests/graphql/graphql-overview/) in the Postman documentation for more information. ### Authentication + Authorization diff --git a/docs/architecture/architecture.md b/docs/architecture/architecture.md index 6a9c1860d71b0..20f18f09d949b 100644 --- a/docs/architecture/architecture.md +++ b/docs/architecture/architecture.md @@ -17,7 +17,7 @@ The figures below describe the high-level architecture of DataHub.

- +

diff --git a/docs/authentication/guides/add-users.md b/docs/authentication/guides/add-users.md index f5dfc83201083..d380cacd6665e 100644 --- a/docs/authentication/guides/add-users.md +++ b/docs/authentication/guides/add-users.md @@ -19,13 +19,13 @@ To do so, navigate to the **Users & Groups** section inside of Settings page. He do not have the correct privileges to invite users, this button will be disabled.

- +

To invite new users, simply share the link with others inside your organization.

- +

When a new user visits the link, they will be directed to a sign up screen where they can create their DataHub account. @@ -37,13 +37,13 @@ and click **Reset user password** inside the menu dropdown on the right hand sid `Manage User Credentials` [Platform Privilege](../../authorization/access-policies-guide.md) in order to reset passwords.

- +

To reset the password, simply share the password reset link with the user who needs to change their password. Password reset links expire after 24 hours.

- +

# Configuring Single Sign-On with OpenID Connect diff --git a/docs/authentication/guides/sso/configure-oidc-react.md b/docs/authentication/guides/sso/configure-oidc-react.md index d27792ce3967b..512d6adbf916f 100644 --- a/docs/authentication/guides/sso/configure-oidc-react.md +++ b/docs/authentication/guides/sso/configure-oidc-react.md @@ -26,7 +26,7 @@ please see [this guide](../jaas.md) to mount a custom user.props file for a JAAS To configure OIDC in React, you will most often need to register yourself as a client with your identity provider (Google, Okta, etc). Each provider may have their own instructions. Provided below are links to examples for Okta, Google, Azure AD, & Keycloak. -- [Registering an App in Okta](https://developer.okta.com/docs/guides/add-an-external-idp/apple/register-app-in-okta/) +- [Registering an App in Okta](https://developer.okta.com/docs/guides/add-an-external-idp/openidconnect/main/) - [OpenID Connect in Google Identity](https://developers.google.com/identity/protocols/oauth2/openid-connect) - [OpenID Connect authentication with Azure Active Directory](https://docs.microsoft.com/en-us/azure/active-directory/fundamentals/auth-oidc) - [Keycloak - Securing Applications and Services Guide](https://www.keycloak.org/docs/latest/securing_apps/) diff --git a/docs/cli.md b/docs/cli.md index eb8bb406b0107..267f289d9f54a 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -547,7 +547,7 @@ Old Entities Migrated = {'urn:li:dataset:(urn:li:dataPlatform:hive,logging_event ### Using docker [![Docker Hub](https://img.shields.io/docker/pulls/acryldata/datahub-ingestion?style=plastic)](https://hub.docker.com/r/acryldata/datahub-ingestion) -[![datahub-ingestion docker](https://github.com/acryldata/datahub/actions/workflows/docker-ingestion.yml/badge.svg)](https://github.com/acryldata/datahub/actions/workflows/docker-ingestion.yml) +[![datahub-ingestion docker](https://github.com/acryldata/datahub/workflows/datahub-ingestion%20docker/badge.svg)](https://github.com/acryldata/datahub/actions/workflows/docker-ingestion.yml) If you don't want to install locally, you can alternatively run metadata ingestion within a Docker container. We have prebuilt images available on [Docker hub](https://hub.docker.com/r/acryldata/datahub-ingestion). All plugins will be installed and enabled automatically. diff --git a/docs/domains.md b/docs/domains.md index c846a753417c5..1b2ebc9d47f39 100644 --- a/docs/domains.md +++ b/docs/domains.md @@ -22,20 +22,20 @@ You can create this privileges by creating a new [Metadata Policy](./authorizati To create a Domain, first navigate to the **Domains** tab in the top-right menu of DataHub.

- +

Once you're on the Domains page, you'll see a list of all the Domains that have been created on DataHub. Additionally, you can view the number of entities inside each Domain.

- +

To create a new Domain, click '+ New Domain'.

- +

Inside the form, you can choose a name for your Domain. Most often, this will align with your business units or groups, for example @@ -48,7 +48,7 @@ for the Domain. This option is useful if you intend to refer to Domains by a com key to be human-readable. Proceed with caution: once you select a custom id, it cannot be easily changed.

- +

By default, you don't need to worry about this. DataHub will auto-generate a unique Domain id for you. @@ -64,7 +64,7 @@ To assign an asset to a Domain, simply navigate to the asset's profile page. At see a 'Domain' section. Click 'Set Domain', and then search for the Domain you'd like to add to. When you're done, click 'Add'.

- +

To remove an asset from a Domain, click the 'x' icon on the Domain tag. @@ -149,27 +149,27 @@ source: Once you've created a Domain, you can use the search bar to find it.

- +

Clicking on the search result will take you to the Domain's profile, where you can edit its description, add / remove owners, and view the assets inside the Domain.

- +

Once you've added assets to a Domain, you can filter search results to limit to those Assets within a particular Domain using the left-side search filters.

- +

On the homepage, you'll also find a list of the most popular Domains in your organization.

- +

## Additional Resources @@ -242,7 +242,6 @@ DataHub supports Tags, Glossary Terms, & Domains as distinct types of Metadata t - **Tags**: Informal, loosely controlled labels that serve as a tool for search & discovery. Assets may have multiple tags. No formal, central management. - **Glossary Terms**: A controlled vocabulary, with optional hierarchy. Terms are typically used to standardize types of leaf-level attributes (i.e. schema fields) for governance. E.g. (EMAIL_PLAINTEXT) - **Domains**: A set of top-level categories. Usually aligned to business units / disciplines to which the assets are most relevant. Central or distributed management. Single Domain assignment per data asset. - *Need more help? Join the conversation in [Slack](http://slack.datahubproject.io)!* ### Related Features diff --git a/docs/how/add-new-aspect.md b/docs/how/add-new-aspect.md index 6ea7256ed75cc..d1fe567018903 100644 --- a/docs/how/add-new-aspect.md +++ b/docs/how/add-new-aspect.md @@ -1,20 +1,20 @@ # How to add a new metadata aspect? Adding a new metadata [aspect](../what/aspect.md) is one of the most common ways to extend an existing [entity](../what/entity.md). -We'll use the [CorpUserEditableInfo](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/identity/CorpUserEditableInfo.pdl) as an example here. +We'll use the CorpUserEditableInfo as an example here. 1. Add the aspect model to the corresponding namespace (e.g. [`com.linkedin.identity`](https://github.com/datahub-project/datahub/tree/master/metadata-models/src/main/pegasus/com/linkedin/identity)) -2. Extend the entity's aspect union to include the new aspect (e.g. [`CorpUserAspect`](https://github.com/datahub-project/datahub/blob/master/metadata-models/src/main/pegasus/com/linkedin/metadata/aspect/CorpUserAspect.pdl)) +2. Extend the entity's aspect union to include the new aspect. 3. Rebuild the rest.li [IDL & snapshot](https://linkedin.github.io/rest.li/modeling/compatibility_check) by running the following command from the project root ``` ./gradlew :metadata-service:restli-servlet-impl:build -Prest.model.compatibility=ignore ``` -4. To surface the new aspect at the top-level [resource endpoint](https://linkedin.github.io/rest.li/user_guide/restli_server#writing-resources), extend the resource data model (e.g. [`CorpUser`](https://github.com/datahub-project/datahub/blob/master/gms/api/src/main/pegasus/com/linkedin/identity/CorpUser.pdl)) with an optional field (e.g. [`editableInfo`](https://github.com/datahub-project/datahub/blob/master/gms/api/src/main/pegasus/com/linkedin/identity/CorpUser.pdl#L21)). You'll also need to extend the `toValue` & `toSnapshot` methods of the top-level resource (e.g. [`CorpUsers`](https://github.com/datahub-project/datahub/blob/master/gms/impl/src/main/java/com/linkedin/metadata/resources/identity/CorpUsers.java)) to convert between the snapshot & value models. +4. To surface the new aspect at the top-level [resource endpoint](https://linkedin.github.io/rest.li/user_guide/restli_server#writing-resources), extend the resource data model with an optional field. You'll also need to extend the `toValue` & `toSnapshot` methods of the top-level resource (e.g. [`CorpUsers`](https://github.com/datahub-project/datahub/blob/master/gms/impl/src/main/java/com/linkedin/metadata/resources/identity/CorpUsers.java)) to convert between the snapshot & value models. -5. (Optional) If there's need to update the aspect via API (instead of/in addition to MCE), add a [sub-resource](https://linkedin.github.io/rest.li/user_guide/restli_server#sub-resources) endpoint for the new aspect (e.g. [`CorpUsersEditableInfoResource`](https://github.com/datahub-project/datahub/blob/master/gms/impl/src/main/java/com/linkedin/metadata/resources/identity/CorpUsersEditableInfoResource.java)). The sub-resource endpiont also allows you to retrieve previous versions of the aspect as well as additional metadata such as the audit stamp. +5. (Optional) If there's need to update the aspect via API (instead of/in addition to MCE), add a [sub-resource](https://linkedin.github.io/rest.li/user_guide/restli_server#sub-resources) endpoint for the new aspect (e.g. `CorpUsersEditableInfoResource`). The sub-resource endpiont also allows you to retrieve previous versions of the aspect as well as additional metadata such as the audit stamp. -6. After rebuilding & restarting [gms](https://github.com/datahub-project/datahub/tree/master/gms), [mce-consumer-job](https://github.com/datahub-project/datahub/tree/master/metadata-jobs/mce-consumer-job) & [mae-consumer-job](https://github.com/datahub-project/datahub/tree/master/metadata-jobs/mae-consumer-job), +6. After rebuilding & restarting gms, [mce-consumer-job](https://github.com/datahub-project/datahub/tree/master/metadata-jobs/mce-consumer-job) & [mae-consumer-job](https://github.com/datahub-project/datahub/tree/master/metadata-jobs/mae-consumer-job),z you should be able to start emitting [MCE](../what/mxe.md) with the new aspect and have it automatically ingested & stored in DB. diff --git a/docs/modeling/extending-the-metadata-model.md b/docs/modeling/extending-the-metadata-model.md index 98f70f6d933e4..be2d7d795de70 100644 --- a/docs/modeling/extending-the-metadata-model.md +++ b/docs/modeling/extending-the-metadata-model.md @@ -24,7 +24,7 @@ We will refer to the two options as the **open-source fork** and **custom reposi ## This Guide This guide will outline what the experience of adding a new Entity should look like through a real example of adding the -Dashboard Entity. If you want to extend an existing Entity, you can skip directly to [Step 3](#step_3). +Dashboard Entity. If you want to extend an existing Entity, you can skip directly to [Step 3](#step-3-define-custom-aspects-or-attach-existing-aspects-to-your-entity). At a high level, an entity is made up of: @@ -82,14 +82,14 @@ Because they are aspects, keys need to be annotated with an @Aspect annotation, can be a part of. The key can also be annotated with the two index annotations: @Relationship and @Searchable. This instructs DataHub -infra to use the fields in the key to create relationships and index fields for search. See [Step 3](#step_3) for more details on +infra to use the fields in the key to create relationships and index fields for search. See [Step 3](#step-3-define-custom-aspects-or-attach-existing-aspects-to-your-entity) for more details on the annotation model. **Constraints**: Note that each field in a Key Aspect MUST be of String or Enum type. ### Step 2: Create the new entity with its key aspect -Define the entity within an `entity-registry.yml` file. Depending on your approach, the location of this file may vary. More on that in steps [4](#step_4) and [5](#step_5). +Define the entity within an `entity-registry.yml` file. Depending on your approach, the location of this file may vary. More on that in steps [4](#step-4-choose-a-place-to-store-your-model-extension) and [5](#step-5-attaching-your-non-key-aspects-to-the-entity). Example: ```yaml @@ -212,11 +212,11 @@ After you create your Aspect, you need to attach to all the entities that it app **Constraints**: Note that all aspects MUST be of type Record. -### Step 4: Choose a place to store your model extension +### Step 4: Choose a place to store your model extension At the beginning of this document, we walked you through a flow-chart that should help you decide whether you need to maintain a fork of the open source DataHub repo for your model extensions, or whether you can just use a model extension repository that can stay independent of the DataHub repo. Depending on what path you took, the place you store your aspect model files (the .pdl files) and the entity-registry files (the yaml file called `entity-registry.yaml` or `entity-registry.yml`) will vary. -- Open source Fork: Aspect files go under [`metadata-models`](../../metadata-models) module in the main repo, entity registry goes into [`metadata-models/src/main/resources/entity-registry.yml`](../../metadata-models/src/main/resources/entity-registry.yml). Read on for more details in [Step 5](#step_5). +- Open source Fork: Aspect files go under [`metadata-models`](../../metadata-models) module in the main repo, entity registry goes into [`metadata-models/src/main/resources/entity-registry.yml`](../../metadata-models/src/main/resources/entity-registry.yml). Read on for more details in [Step 5](#step-5-attaching-your-non-key-aspects-to-the-entity). - Custom repository: Read the [metadata-models-custom](../../metadata-models-custom/README.md) documentation to learn how to store and version your aspect models and registry. ### Step 5: Attaching your non-key Aspect(s) to the Entity diff --git a/docs/modeling/metadata-model.md b/docs/modeling/metadata-model.md index 037c9c7108a6e..a8958985a0a72 100644 --- a/docs/modeling/metadata-model.md +++ b/docs/modeling/metadata-model.md @@ -433,7 +433,7 @@ aggregation query against a timeseries aspect. The *@TimeseriesField* and the *@TimeseriesFieldCollection* are two new annotations that can be attached to a field of a *Timeseries aspect* that allows it to be part of an aggregatable query. The kinds of aggregations allowed on these annotated fields depends on the type of the field, as well as the kind of aggregation, as -described [here](#Performing-an-aggregation-on-a-Timeseries-aspect). +described [here](#performing-an-aggregation-on-a-timeseries-aspect). * `@TimeseriesField = {}` - this annotation can be used with any type of non-collection type field of the aspect such as primitive types and records (see the fields *stat*, *strStat* and *strArray* fields @@ -515,7 +515,7 @@ my_emitter = DatahubRestEmitter("http://localhost:8080") my_emitter.emit(mcpw) ``` -###### Performing an aggregation on a Timeseries aspect. +###### Performing an aggregation on a Timeseries aspect Aggreations on timeseries aspects can be performed by the GMS REST API for `/analytics?action=getTimeseriesStats` which accepts the following params. diff --git a/docs/tags.md b/docs/tags.md index 945b514dc7b47..cb08c9fafea49 100644 --- a/docs/tags.md +++ b/docs/tags.md @@ -27,25 +27,25 @@ You can create these privileges by creating a new [Metadata Policy](./authorizat To add a tag at the dataset or container level, simply navigate to the page for that entity and click on the **Add Tag** button.

- +

Type in the name of the tag you want to add. You can add a new tag, or add a tag that already exists (the autocomplete will pull up the tag if it already exists).

- +

Click on the "Add" button and you'll see the tag has been added!

- +

If you would like to add a tag at the schema level, hover over the "Tags" column for a schema until the "Add Tag" button shows up, and then follow the same flow as above.

- +

### Removing a Tag @@ -57,7 +57,7 @@ To remove a tag, simply click on the "X" button in the tag. Then click "Yes" whe You can search for a tag in the search bar, and even filter entities by the presence of a specific tag.

- +

## Additional Resources diff --git a/docs/townhall-history.md b/docs/townhall-history.md index e235a70c5d7b9..d92905af0cd72 100644 --- a/docs/townhall-history.md +++ b/docs/townhall-history.md @@ -328,7 +328,7 @@ November Town Hall (in December!) * Welcome - 5 mins * Latest React App Demo! ([video](https://www.youtube.com/watch?v=RQBEJhcen5E)) by John Joyce and Gabe Lyons - 5 mins -* Use-Case: DataHub at Geotab ([slides](https://docs.google.com/presentation/d/1qcgO3BW5NauuG0HnPqrxGcujsK-rJ1-EuU-7cbexkqE/edit?usp=sharing),[video](https://www.youtube.com/watch?v=boyjT2OrlU4)) by [John Yoon](https://www.linkedin.com/in/yhjyoon/) - 15 mins +* Use-Case: DataHub at Geotab ([video](https://www.youtube.com/watch?v=boyjT2OrlU4)) by [John Yoon](https://www.linkedin.com/in/yhjyoon/) - 15 mins * Tech Deep Dive: Tour of new pull-based Python Ingestion scripts ([slides](https://docs.google.com/presentation/d/15Xay596WDIhzkc5c8DEv6M-Bv1N4hP8quup1tkws6ms/edit#slide=id.gb478361595_0_10),[video](https://www.youtube.com/watch?v=u0IUQvG-_xI)) by [Harshal Sheth](https://www.linkedin.com/in/hsheth2/) - 15 mins * General Q&A from sign up sheet, slack, and participants - 15 mins * Closing remarks - 5 mins diff --git a/docs/what/gms.md b/docs/what/gms.md index 9e1cea1b9540e..a39450d28ae83 100644 --- a/docs/what/gms.md +++ b/docs/what/gms.md @@ -2,6 +2,4 @@ Metadata for [entities](entity.md) [onboarded](../modeling/metadata-model.md) to [GMA](gma.md) is served through microservices known as Generalized Metadata Service (GMS). GMS typically provides a [Rest.li](http://rest.li) API and must access the metadata using [GMA DAOs](../architecture/metadata-serving.md). -While a GMS is completely free to define its public APIs, we do provide a list of [resource base classes](https://github.com/datahub-project/datahub-gma/tree/master/restli-resources/src/main/java/com/linkedin/metadata/restli) to leverage for common patterns. - -GMA is designed to support a distributed fleet of GMS, each serving a subset of the [GMA graph](graph.md). However, for simplicity we include a single centralized GMS ([datahub-gms](../../gms)) that serves all entities. +GMA is designed to support a distributed fleet of GMS, each serving a subset of the [GMA graph](graph.md). However, for simplicity we include a single centralized GMS that serves all entities. diff --git a/docs/what/mxe.md b/docs/what/mxe.md index 8af96360858a3..25294e04ea3d9 100644 --- a/docs/what/mxe.md +++ b/docs/what/mxe.md @@ -266,7 +266,7 @@ A Metadata Change Event represents a request to change multiple aspects for the It leverages a deprecated concept of `Snapshot`, which is a strongly-typed list of aspects for the same entity. -A MCE is a "proposal" for a set of metadata changes, as opposed to [MAE](#metadata-audit-event), which is conveying a committed change. +A MCE is a "proposal" for a set of metadata changes, as opposed to [MAE](#metadata-audit-event-mae), which is conveying a committed change. Consequently, only successfully accepted and processed MCEs will lead to the emission of a corresponding MAE / MCLs. ### Emission diff --git a/docs/what/relationship.md b/docs/what/relationship.md index dcfe093a1b124..d5348dc04b3c0 100644 --- a/docs/what/relationship.md +++ b/docs/what/relationship.md @@ -102,9 +102,6 @@ For one, the actual direction doesn’t really impact the execution of graph que That being said, generally there’s a more "natural way" to specify the direction of a relationship, which closely relate to how the metadata is stored. For example, the membership information for an LDAP group is generally stored as a list in group’s metadata. As a result, it’s more natural to model a `HasMember` relationship that points from a group to a member, instead of a `IsMemberOf` relationship pointing from member to group. -Since all relationships are explicitly declared, it’s fairly easy for a user to discover what relationships are available and their directionality by inspecting -the [relationships directory](../../metadata-models/src/main/pegasus/com/linkedin/metadata/relationship). It’s also possible to provide a UI for the catalog of entities and relationships for analysts who are interested in building complex graph queries to gain insights into the metadata. - ## High Cardinality Relationships See [this doc](../advanced/high-cardinality.md) for suggestions on how to best model relationships with high cardinality. diff --git a/docs/what/search-document.md b/docs/what/search-document.md index 81359a55d0cae..bd27656e512c3 100644 --- a/docs/what/search-document.md +++ b/docs/what/search-document.md @@ -13,7 +13,6 @@ As a result, one may be tempted to add as many attributes as needed. This is acc Below shows an example schema for the `User` search document. Note that: 1. Each search document is required to have a type-specific `urn` field, generally maps to an entity in the [graph](graph.md). 2. Similar to `Entity`, each document has an optional `removed` field for "soft deletion". -This is captured in [BaseDocument](../../metadata-models/src/main/pegasus/com/linkedin/metadata/search/BaseDocument.pdl), which is expected to be included by all documents. 3. Similar to `Entity`, all remaining fields are made `optional` to support partial updates. 4. `management` shows an example of a string array field. 5. `ownedDataset` shows an example on how a field can be derived from metadata [aspects](aspect.md) associated with other types of entity (in this case, `Dataset`). diff --git a/metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source.md b/metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source.md index 6a1204fb0f2b3..9e39d24fb8578 100644 --- a/metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source.md +++ b/metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source.md @@ -60,16 +60,14 @@ class StaleEntityCheckpointStateBase(CheckpointStateBase, ABC, Generic[Derived]) ``` Examples: -1. [KafkaCheckpointState](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/kafka_state.py#L11). -2. [DbtCheckpointState](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/dbt_state.py#L16) -3. [BaseSQLAlchemyCheckpointState](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/sql_common_state.py#L17) +* [BaseSQLAlchemyCheckpointState](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/sql_common_state.py#L17) ### 2. Modifying the SourceConfig The source's config must inherit from `StatefulIngestionConfigBase`, and should declare a field named `stateful_ingestion` of type `Optional[StatefulStaleMetadataRemovalConfig]`. Examples: -1. The `KafkaSourceConfig` +- The `KafkaSourceConfig` ```python from typing import List, Optional import pydantic @@ -84,9 +82,6 @@ class KafkaSourceConfig(StatefulIngestionConfigBase): stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None ``` -2. The [DBTStatefulIngestionConfig](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/dbt.py#L131) - and the [DBTConfig](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/dbt.py#L317). - ### 3. Modifying the SourceReport The report class of the source should inherit from `StaleEntityRemovalSourceReport` whose definition is shown below. ```python @@ -102,7 +97,7 @@ class StaleEntityRemovalSourceReport(StatefulIngestionReport): ``` Examples: -1. The `KafkaSourceReport` +* The `KafkaSourceReport` ```python from dataclasses import dataclass from datahub.ingestion.source.state.stale_entity_removal_handler import StaleEntityRemovalSourceReport @@ -110,7 +105,7 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import StaleEnt class KafkaSourceReport(StaleEntityRemovalSourceReport): #