Skip to content

Commit

Permalink
Refactor LoadMethod.LOCAL to use symlinks instead of copying directory (
Browse files Browse the repository at this point in the history
#660)

This PR refactors the `create_symlinks` function that was previously
used in load via dbt ls so that it can be used in
`DbtLocalBaseOperator.run_command` instead of copying the entire
directory.

Closes: #614
  • Loading branch information
jbandoro authored Nov 14, 2023
1 parent 0f16d15 commit 5d23758
Show file tree
Hide file tree
Showing 11 changed files with 46 additions and 41 deletions.
9 changes: 1 addition & 8 deletions cosmos/dbt/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
LoadMode,
)
from cosmos.dbt.parser.project import LegacyDbtProject
from cosmos.dbt.project import create_symlinks
from cosmos.dbt.selector import select_nodes
from cosmos.log import get_logger

Expand Down Expand Up @@ -51,14 +52,6 @@ class DbtNode:
has_test: bool = False


def create_symlinks(project_path: Path, tmp_dir: Path) -> None:
"""Helper function to create symlinks to the dbt project files."""
ignore_paths = (DBT_LOG_DIR_NAME, DBT_TARGET_DIR_NAME, "dbt_packages", "profiles.yml")
for child_name in os.listdir(project_path):
if child_name not in ignore_paths:
os.symlink(project_path / child_name, tmp_dir / child_name)


def run_command(command: list[str], tmp_dir: Path, env_vars: dict[str, str]) -> str:
"""Run a command in a subprocess, returning the stdout."""
logger.info("Running command: `%s`", " ".join(command))
Expand Down
14 changes: 14 additions & 0 deletions cosmos/dbt/project.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from pathlib import Path
import os
from cosmos.constants import (
DBT_LOG_DIR_NAME,
DBT_TARGET_DIR_NAME,
)


def create_symlinks(project_path: Path, tmp_dir: Path) -> None:
"""Helper function to create symlinks to the dbt project files."""
ignore_paths = (DBT_LOG_DIR_NAME, DBT_TARGET_DIR_NAME, "dbt_packages", "profiles.yml")
for child_name in os.listdir(project_path):
if child_name not in ignore_paths:
os.symlink(project_path / child_name, tmp_dir / child_name)
13 changes: 4 additions & 9 deletions cosmos/operators/local.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import os
import shutil
import signal
import tempfile
from attr import define
Expand Down Expand Up @@ -45,6 +44,7 @@
FullOutputSubprocessResult,
)
from cosmos.dbt.parser.output import extract_log_issues, parse_output
from cosmos.dbt.project import create_symlinks

DBT_NO_TESTS_MSG = "Nothing to do"
DBT_WARN_MSG = "WARN"
Expand Down Expand Up @@ -192,19 +192,14 @@ def run_command(
"""
Copies the dbt project to a temporary directory and runs the command.
"""
with tempfile.TemporaryDirectory() as tmp_dir:
with tempfile.TemporaryDirectory() as tmp_project_dir:
logger.info(
"Cloning project to writable temp directory %s from %s",
tmp_dir,
tmp_project_dir,
self.project_dir,
)

# need a subfolder because shutil.copytree will fail if the destination dir already exists
tmp_project_dir = os.path.join(tmp_dir, "dbt_project")
shutil.copytree(
self.project_dir,
tmp_project_dir,
)
create_symlinks(Path(self.project_dir), Path(tmp_project_dir))

with self.profile_config.ensure_profile() as profile_values:
(profile_path, env_vars) = profile_values
Expand Down
Binary file not shown.
2 changes: 1 addition & 1 deletion dev/dags/dbt/simple/models/movies_ratings_simplified.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ select
"Domestic",
"Foreign",
"Worldwide"
from {{ source('imdb', 'movies_ratings') }}
from {{ source('main', 'movies_ratings') }}
2 changes: 1 addition & 1 deletion dev/dags/dbt/simple/models/source.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: 2

sources:
- name: imdb
- name: main
description: Example of IMDB SQlite database
tables:
- name: movies_ratings
Expand Down
4 changes: 2 additions & 2 deletions dev/dags/dbt/simple/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ simple:
database: 'database'
schema: 'main'
schemas_and_paths:
main: "{{ env_var('DBT_SQLITE_PATH', '.') }}/data/imdb.db"
schema_directory: 'data'
main: "{{ env_var('DBT_SQLITE_PATH') }}/imdb.db"
schema_directory: "{{ env_var('DBT_SQLITE_PATH') }}"
2 changes: 1 addition & 1 deletion dev/dags/example_cosmos_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
DEFAULT_DBT_ROOT_PATH = Path(__file__).parent / "dbt"
DBT_ROOT_PATH = Path(os.getenv("DBT_ROOT_PATH", DEFAULT_DBT_ROOT_PATH))

os.environ["DBT_SQLITE_PATH"] = str(DEFAULT_DBT_ROOT_PATH / "simple")
os.environ["DBT_SQLITE_PATH"] = str(DEFAULT_DBT_ROOT_PATH / "data")


profile_config = ProfileConfig(
Expand Down
14 changes: 1 addition & 13 deletions tests/dbt/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
DbtGraph,
DbtNode,
LoadMode,
create_symlinks,
run_command,
parse_dbt_ls_output,
)
Expand Down Expand Up @@ -388,7 +387,7 @@ def test_load_via_dbt_ls_with_sources(load_method):
)
getattr(dbt_graph, load_method)()
assert len(dbt_graph.nodes) == 4
assert "source.simple.imdb.movies_ratings" in dbt_graph.nodes
assert "source.simple.main.movies_ratings" in dbt_graph.nodes
assert "exposure.simple.weekly_metrics" in dbt_graph.nodes


Expand Down Expand Up @@ -661,17 +660,6 @@ def test_load_dbt_ls_and_manifest_with_model_version(load_method):
} == set(dbt_graph.nodes["model.jaffle_shop.orders"].depends_on)


def test_create_symlinks(tmp_path):
"""Tests that symlinks are created for expected files in the dbt project directory."""
tmp_dir = tmp_path / "dbt-project"
tmp_dir.mkdir()

create_symlinks(DBT_PROJECTS_ROOT_DIR / "jaffle_shop", tmp_dir)
for child in tmp_dir.iterdir():
assert child.is_symlink()
assert child.name not in ("logs", "target", "profiles.yml", "dbt_packages")


@pytest.mark.parametrize(
"stdout,returncode",
[
Expand Down
15 changes: 15 additions & 0 deletions tests/dbt/test_project.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from pathlib import Path
from cosmos.dbt.project import create_symlinks

DBT_PROJECTS_ROOT_DIR = Path(__file__).parent.parent.parent / "dev/dags/dbt"


def test_create_symlinks(tmp_path):
"""Tests that symlinks are created for expected files in the dbt project directory."""
tmp_dir = tmp_path / "dbt-project"
tmp_dir.mkdir()

create_symlinks(DBT_PROJECTS_ROOT_DIR / "jaffle_shop", tmp_dir)
for child in tmp_dir.iterdir():
assert child.is_symlink()
assert child.name not in ("logs", "target", "profiles.yml", "dbt_packages")
12 changes: 6 additions & 6 deletions tests/sample/manifest_source.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"model.simple.top_animations": [
"exposure.simple.weekly_metrics"
],
"source.simple.imdb.movies_ratings": [
"source.simple.main.movies_ratings": [
"model.simple.movies_ratings_simplified"
]
},
Expand Down Expand Up @@ -6992,7 +6992,7 @@
"depends_on": {
"macros": [],
"nodes": [
"source.simple.imdb.movies_ratings"
"source.simple.main.movies_ratings"
]
},
"description": "",
Expand Down Expand Up @@ -7110,16 +7110,16 @@
"model.simple.top_animations"
],
"model.simple.movies_ratings_simplified": [
"source.simple.imdb.movies_ratings"
"source.simple.main.movies_ratings"
],
"model.simple.top_animations": [
"model.simple.movies_ratings_simplified"
],
"source.simple.imdb.movies_ratings": []
"source.simple.main.movies_ratings": []
},
"selectors": {},
"sources": {
"source.simple.imdb.movies_ratings": {
"source.simple.main.movies_ratings": {
"columns": {},
"config": {
"enabled": true
Expand Down Expand Up @@ -7166,7 +7166,7 @@
"source_meta": {},
"source_name": "imdb",
"tags": [],
"unique_id": "source.simple.imdb.movies_ratings",
"unique_id": "source.simple.main.movies_ratings",
"unrendered_config": {}
}
}
Expand Down

0 comments on commit 5d23758

Please sign in to comment.