Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dbt docs natively in Airflow via plugin #737

Merged
merged 33 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
020e9e4
add dbt docs hosting support
dwreeves Dec 2, 2023
266cb23
Merge branch 'main' into add-dbt-docs-support
tatiana Dec 4, 2023
f3be3dd
patch
dwreeves Dec 5, 2023
0a7186f
resolve merge conflict
dwreeves Dec 5, 2023
238af68
Merge branch 'add-dbt-docs-support' of github.com-dwreeves:dwreeves/a…
dwreeves Dec 5, 2023
e2cd226
Merge branch 'main' into add-dbt-docs-support
tatiana Dec 5, 2023
e589035
update tests for more coverage
dwreeves Dec 6, 2023
ab47633
merge
dwreeves Dec 6, 2023
92ac35b
Update test_plugin.py
dwreeves Dec 6, 2023
fe92ff8
Update test_plugin.py
dwreeves Dec 6, 2023
267fd89
Update test_plugin.py
dwreeves Dec 6, 2023
6be6935
Update test_plugin.py
dwreeves Dec 6, 2023
6284220
Merge branch 'main' into add-dbt-docs-support
dwreeves Dec 6, 2023
b8a886e
Update test_plugin.py
dwreeves Dec 6, 2023
daef1c8
Update test_plugin.py
dwreeves Dec 6, 2023
02911cf
update tests
dwreeves Dec 8, 2023
e5aeff6
Merge branch 'main' into add-dbt-docs-support
dwreeves Dec 8, 2023
465228c
Merge branch 'main' into add-dbt-docs-support
tatiana Dec 13, 2023
563115b
Merge branch 'main' into add-dbt-docs-support
tatiana Dec 13, 2023
4fad6be
Merge branch 'main' into add-dbt-docs-support
tatiana Dec 14, 2023
ab42fdd
Merge branch 'main' into add-dbt-docs-support
dwreeves Dec 30, 2023
d0fd401
Merge branch 'main' into add-dbt-docs-support
dwreeves Jan 4, 2024
7c9d8d3
updates
dwreeves Jan 6, 2024
c67f05d
fix test
dwreeves Jan 7, 2024
0116db0
Merge branch 'main' into add-dbt-docs-support
dwreeves Jan 7, 2024
548af84
Merge branch 'main' into add-dbt-docs-support
tatiana Jan 8, 2024
9c1f19c
Merge branch 'main' into add-dbt-docs-support
tatiana Jan 18, 2024
1a7726d
Merge branch 'main' into add-dbt-docs-support
tatiana Jan 18, 2024
28c9c73
Merge branch 'main' into add-dbt-docs-support
tatiana Jan 23, 2024
8505bd2
Merge branch 'main' into add-dbt-docs-support
tatiana Jan 29, 2024
6f39181
Merge branch 'main' into add-dbt-docs-support
dwreeves Feb 15, 2024
a2395b0
Update hosting-docs.rst
dwreeves Feb 20, 2024
62c5b3a
Merge branch 'main' into add-dbt-docs-support
jbandoro Feb 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ repos:
types: [text]
args:
- --exclude-file=tests/sample/manifest_model_version.json
- --skip=**/manifest.json
- --skip=**/manifest.json,**.min.js
- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.10.0
hooks:
Expand Down
207 changes: 207 additions & 0 deletions cosmos/plugin/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
import os.path as op
from typing import Any, Dict, Optional, Tuple
from urllib.parse import urlsplit

from airflow.configuration import conf
from airflow.exceptions import AirflowConfigException, AirflowNotFoundException
from airflow.plugins_manager import AirflowPlugin
from airflow.security import permissions
from airflow.www.auth import has_access
from airflow.www.views import AirflowBaseView
from flask import abort, url_for
from flask_appbuilder import AppBuilder, expose


def bucket_and_key(path: str) -> Tuple[str, str]:
parsed_url = urlsplit(path)
bucket = parsed_url.netloc
key = parsed_url.path.lstrip("/")
return bucket, key


def open_file(path: str) -> str: # noqa: C901
"""Retrieve a file from http, https, gs, s3, or wasb."""
try:
conn_id: Optional[str] = conf.get("cosmos", "dbt_docs_conn_id")
except AirflowConfigException:
conn_id = None

if path.strip().startswith("s3://"):
from airflow.providers.amazon.aws.hooks.s3 import S3Hook

if conn_id is None:
hook = S3Hook()
else:
hook = S3Hook(aws_conn_id=conn_id)
bucket, key = hook.parse_s3_url(path)
content = hook.read_key(key=key, bucket_name=bucket)

return content # type: ignore[no-any-return]
elif path.strip().startswith("gs://"):
from airflow.providers.google.cloud.hooks.gcs import GCSHook

if conn_id is None:
hook = GCSHook()
else:
hook = GCSHook(gcp_conn_id=conn_id)

bucket, blob = bucket_and_key(path)
content = hook.download(bucket_name=bucket, object_name=blob)
return content.decode("utf-8") # type: ignore[no-any-return]

elif path.strip().startswith("wasb://"):
from airflow.providers.microsoft.azure.hooks.wasb import WasbHook

if conn_id is None:
hook = WasbHook()
else:
hook = WasbHook(wasb_conn_id=conn_id)

container, blob = bucket_and_key(path)
content = hook.read_file(container_name=container, blob_name=blob)
return content # type: ignore[no-any-return]

elif path.strip().startswith("http://") or path.strip().startswith("https://"):
from airflow.providers.http.hooks.http import HttpHook

if conn_id is None:
try:
HttpHook.get_connection(conn_id=HttpHook.default_conn_name)
hook = HttpHook(method="GET")
except AirflowNotFoundException:
hook = HttpHook(method="GET", http_conn_id="")
else:
hook = HttpHook(method="GET", http_conn_id=conn_id)
res = hook.run(endpoint=path)
hook.check_response(res)
return res.text # type: ignore[no-any-return]

else:
with open(path) as f:
content = f.read()
return content # type: ignore[no-any-return]


iframe_script = """
<script>
function getMaxElement(side, elements_query) {
var elements = document.querySelectorAll(elements_query)
var elementsLength = elements.length,
elVal = 0,
maxVal = 0,
Side = capitalizeFirstLetter(side),
timer = Date.now()

for (var i = 0; i < elementsLength; i++) {
elVal =
elements[i].getBoundingClientRect()[side] +
getComputedStyleWrapper('margin' + Side, elements[i])
if (elVal > maxVal) {
maxVal = elVal
}
}

timer = Date.now() - timer

chkEventThottle(timer)

return maxVal
}
var throttledTimer = 16
function chkEventThottle(timer) {
if (timer > throttledTimer / 2) {
throttledTimer = 2 * timer
}
}
function capitalizeFirstLetter(string) {
return string.charAt(0).toUpperCase() + string.slice(1)
}
function getComputedStyleWrapper(prop, el) {
var retVal = 0
el = el || document.body // Not testable in phantonJS

retVal = document.defaultView.getComputedStyle(el, null)
retVal = null === retVal ? 0 : retVal[prop]

return parseInt(retVal)
}
window.iFrameResizer = {
heightCalculationMethod: function getHeight() {
return Math.max(
// Overview page
getMaxElement('bottom', 'div.panel.panel-default') + 50,
// Model page
getMaxElement('bottom', 'section.section') + 75,
// Search page
getMaxElement('bottom', 'div.result-body') + 110
)
}
}
</script>
"""


class DbtDocsView(AirflowBaseView):
default_view = "dbt_docs"
route_base = "/cosmos"
template_folder = op.join(op.dirname(__file__), "templates")
static_folder = op.join(op.dirname(__file__), "static")

def create_blueprint(
self, appbuilder: AppBuilder, endpoint: Optional[str] = None, static_folder: Optional[str] = None
) -> None:
# Make sure the static folder is not overwritten, as we want to use it.
return super().create_blueprint(appbuilder, endpoint=endpoint, static_folder=self.static_folder) # type: ignore[no-any-return]

@expose("/dbt_docs") # type: ignore[misc]
@has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_WEBSITE)])
def dbt_docs(self) -> str:
try:
conf.get("cosmos", "dbt_docs_dir")
except AirflowConfigException:
return self.render_template("dbt_docs_not_set_up.html") # type: ignore[no-any-return,no-untyped-call]
return self.render_template("dbt_docs.html") # type: ignore[no-any-return,no-untyped-call]

@expose("/dbt_docs_index.html") # type: ignore[misc]
@has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_WEBSITE)])
def dbt_docs_index(self) -> str:
try:
docs_dir = conf.get("cosmos", "dbt_docs_dir")
html = open_file(op.join(docs_dir, "index.html"))
except (FileNotFoundError, AirflowConfigException):
abort(404)
else:
# Hack the dbt docs to render properly in an iframe
iframe_resizer_url = url_for(".static", filename="iframeResizer.contentWindow.min.js")
html = html.replace("</head>", f'{iframe_script}<script src="{iframe_resizer_url}"></script></head>', 1)
return html

@expose("/catalog.json") # type: ignore[misc]
@has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_WEBSITE)])
def catalog(self) -> Tuple[str, int, Dict[str, Any]]:
try:
docs_dir = conf.get("cosmos", "dbt_docs_dir")
data = open_file(op.join(docs_dir, "catalog.json"))
except (FileNotFoundError, AirflowConfigException):
abort(404)
else:
return data, 200, {"Content-Type": "application/json"}

@expose("/manifest.json") # type: ignore[misc]
@has_access([(permissions.ACTION_CAN_READ, permissions.RESOURCE_WEBSITE)])
def manifest(self) -> Tuple[str, int, Dict[str, Any]]:
try:
docs_dir = conf.get("cosmos", "dbt_docs_dir")
data = open_file(op.join(docs_dir, "manifest.json"))
except (FileNotFoundError, AirflowConfigException):
abort(404)
else:
return data, 200, {"Content-Type": "application/json"}


dbt_docs_view = DbtDocsView()


class CosmosPlugin(AirflowPlugin):
name = "cosmos"
appbuilder_views = [{"name": "dbt Docs", "category": "Browse", "view": dbt_docs_view}]
Loading
Loading