Skip to content

Commit

Permalink
introduce reversed-manifest tool
Browse files Browse the repository at this point in the history
Signed-off-by: Ching Yi, Chan <[email protected]>
  • Loading branch information
qrtt1 committed Aug 16, 2023
1 parent 6a9f036 commit abaedc4
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 1 deletion.
37 changes: 37 additions & 0 deletions piperider_cli/dbt/reverse_manifest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from typing import Dict


def build_columns(column_details: Dict):
result = dict()
for name, v in column_details.items():
result[name] = dict(name=name, description=v.get('description'),
type='other', schema_type='other')
return result


def reverse_to_run(manifest_data: Dict):
from piperider_cli.dbt.list_task import list_resources_data_from_manifest, load_manifest
base_data = dict(id="tmp-run", tables={}, created_at="2023-08-14T03:46:51.629668Z",
datasource=dict(name='empty', type='duckdb'))
dbt = dict(manifest=manifest_data, run_results={})
base_data['dbt'] = dbt

tables = dict()
nodes = manifest_data.get('nodes')

for x in list_resources_data_from_manifest(load_manifest(manifest_data)):
resource_type = x.get('resource_type')
unique_id = x.get('unique_id')

if unique_id not in nodes:
continue

if resource_type not in ['model', 'seed', 'source']:
continue
table_name = x.get('name')
column_details: Dict[Dict] = nodes.get(unique_id).get('columns')
tables[table_name] = dict(name=table_name, col_count=len(column_details), columns=build_columns(column_details),
ref_id=unique_id)

base_data["tables"] = tables
return base_data
2 changes: 1 addition & 1 deletion static_report/src/utils/dbt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ export const buildDbtNodes = (run?: SaferSRSchema) => {
);
}

if (runResults) {
if (runResults && runResults.results) {
runResults.results.forEach((result) => {
const uniqueId = result.unique_id;
if (dbtNodes[uniqueId]) {
Expand Down
37 changes: 37 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import json
import os

import pytest


def load_mock_dbt_data(file_name):
file_path = os.path.join(os.path.dirname(__file__), 'mock_dbt_data', file_name)
with open(file_path, 'r') as file:
content = file.read()
return json.loads(content)


@pytest.fixture()
def sc_31711_run_data():
return load_mock_dbt_data('sc-31711-run-dbt-1.5.4-no-profiled.json')


@pytest.fixture()
def piperider_schema_validate_func():
import piperider_cli.profiler as p
from jsonschema import validate

schema_def = os.path.join(os.path.dirname(p.__file__), 'schema.json')
with open(schema_def) as fh:
schema = json.loads(fh.read())

def func(data):
from jsonschema.exceptions import ValidationError
try:
validate(instance=data, schema=schema)
except ValidationError as e:
print(f"error: {e.message}")
print(f"path: {e.json_path}")
assert False

return func

Large diffs are not rendered by default.

27 changes: 27 additions & 0 deletions tests/test_reversed_manifest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import json
from typing import Callable, Dict

import pytest

from piperider_cli.dbt import dbt_version
from piperider_cli.dbt.reverse_manifest import reverse_to_run


@pytest.mark.skipif(
dbt_version < "v1.5", reason="skip manifest test before dbt-core 1.5"
)
def test_reversed_manifest(
sc_31711_run_data: Dict, piperider_schema_validate_func: Callable[[Dict], None]
):
run_data: Dict = sc_31711_run_data
manifest_data = run_data.get("dbt", {}).get("manifest")

fake_run = reverse_to_run(manifest_data)
piperider_schema_validate_func(fake_run)

assert manifest_data == fake_run.get("dbt", {}).get("manifest")
with open("sample.json", "w") as fh:
fh.write(json.dumps(fake_run))

# it is impossible to generate the same tables information without profiling
# assert fake_run.get('tables') == run_data.get('tables')

0 comments on commit abaedc4

Please sign in to comment.