Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test that DB schema matches the Alembic migrations #3027

Merged
merged 2 commits into from
Nov 9, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 39 additions & 12 deletions test/unit/io_managers_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Test Dagster IO Managers."""
import datetime
import json
from pathlib import Path

import alembic.config
import hypothesis
import pandas as pd
import pandera
Expand Down Expand Up @@ -204,40 +206,65 @@ def test_missing_schema_error(sqlite_io_manager_fixture):


@pytest.fixture
def pudl_sqlite_io_manager_fixture(tmp_path, test_pkg):
"""Create a SQLiteIOManager fixture with a PUDL database schema."""
db_path = tmp_path / "pudl.sqlite"
def fake_pudl_sqlite_io_manager_fixture(tmp_path, test_pkg, monkeypatch):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this guy actually need monkeypatch?

"""Create a SQLiteIOManager fixture with a fake database schema."""
db_path = tmp_path / "fake.sqlite"

# Create the database and schemas
engine = sa.create_engine(f"sqlite:///{db_path}")
md = test_pkg.to_sql()
md.create_all(engine)
return PudlSQLiteIOManager(base_dir=tmp_path, db_name="pudl", package=test_pkg)
return PudlSQLiteIOManager(base_dir=tmp_path, db_name="fake", package=test_pkg)


def test_error_when_handling_view_without_metadata(pudl_sqlite_io_manager_fixture):
def test_migrations_match_metadata(tmp_path, monkeypatch):
"""If you create a `PudlSQLiteIOManager` that points at a non-existing
`pudl.sqlite` - it will initialize the DB based on the `package`.

If you create a `PudlSQLiteIOManager` that points at an existing
`pudl.sqlite`, like one initialized via `alembic upgrade head`, it
will compare the existing db schema with the db schema in `package`.

We want to make sure that the schema defined in `package` is the same as
the one we arrive at by applying all the migrations.
"""
# alembic wants current directory to be the one with `alembic.ini` in it
monkeypatch.chdir(Path(__file__).parent.parent.parent)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do you need to do this chdir?

# alembic knows to use PudlPaths().pudl_db - so we need to set PUDL_OUTPUT env var
monkeypatch.setenv("PUDL_OUTPUT", tmp_path)
# run all the migrations on a fresh DB at tmp_path/pudl.sqlite
alembic.config.main(["upgrade", "head"])

pkg = Package.from_resource_ids()
PudlSQLiteIOManager(base_dir=tmp_path, db_name="pudl", package=pkg)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this guy is running compare_metadata which is what is actually doing the checking?? why wasn't this doing it before in the pudl_sqlite_io_manager_fixture??

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or is it alembic.config.main(["upgrade", "head"]) that is doing the checking? or the combo or the upgrade/hear w/ generating the io manager?


# all we care about is that it didn't raise an error
assert True


def test_error_when_handling_view_without_metadata(fake_pudl_sqlite_io_manager_fixture):
"""Make sure an error is thrown when a user creates a view without metadata."""
asset_key = "track_view"
sql_stmt = "CREATE VIEW track_view AS SELECT * FROM track;"
output_context = build_output_context(asset_key=AssetKey(asset_key))
with pytest.raises(ValueError):
pudl_sqlite_io_manager_fixture.handle_output(output_context, sql_stmt)
fake_pudl_sqlite_io_manager_fixture.handle_output(output_context, sql_stmt)


@pytest.mark.skip(reason="SQLAlchemy is not finding the view. Debug or remove.")
def test_handling_view_with_metadata(pudl_sqlite_io_manager_fixture):
def test_handling_view_with_metadata(fake_pudl_sqlite_io_manager_fixture):
"""Make sure an users can create and load views when it has metadata."""
# Create some sample data
asset_key = "artist"
artist = pd.DataFrame({"artistid": [1], "artistname": ["Co-op Mop"]})
output_context = build_output_context(asset_key=AssetKey(asset_key))
pudl_sqlite_io_manager_fixture.handle_output(output_context, artist)
fake_pudl_sqlite_io_manager_fixture.handle_output(output_context, artist)

# create the view
asset_key = "artist_view"
sql_stmt = "CREATE VIEW artist_view AS SELECT * FROM artist;"
output_context = build_output_context(asset_key=AssetKey(asset_key))
pudl_sqlite_io_manager_fixture.handle_output(output_context, sql_stmt)
fake_pudl_sqlite_io_manager_fixture.handle_output(output_context, sql_stmt)

# read the view data as a dataframe
input_context = build_input_context(asset_key=AssetKey(asset_key))
Expand All @@ -246,15 +273,15 @@ def test_handling_view_with_metadata(pudl_sqlite_io_manager_fixture):
# sqlalchemy.exc.InvalidRequestError: Could not reflect: requested table(s) not available in
# Engine(sqlite:////private/var/folders/pg/zrqnq8l113q57bndc5__h2640000gn/
# # T/pytest-of-nelsonauner/pytest-38/test_handling_view_with_metada0/pudl.sqlite): (artist_view)
pudl_sqlite_io_manager_fixture.load_input(input_context)
fake_pudl_sqlite_io_manager_fixture.load_input(input_context)


def test_error_when_reading_view_without_metadata(pudl_sqlite_io_manager_fixture):
def test_error_when_reading_view_without_metadata(fake_pudl_sqlite_io_manager_fixture):
"""Make sure and error is thrown when a user loads a view without metadata."""
asset_key = "track_view"
input_context = build_input_context(asset_key=AssetKey(asset_key))
with pytest.raises(ValueError):
pudl_sqlite_io_manager_fixture.load_input(input_context)
fake_pudl_sqlite_io_manager_fixture.load_input(input_context)


def test_ferc_xbrl_sqlite_io_manager_dedupes(mocker, tmp_path):
Expand Down