Skip to content

Commit

Permalink
WIP rebase to give a good message
Browse files Browse the repository at this point in the history
  • Loading branch information
jdangerx committed Nov 7, 2023
1 parent f251def commit c19d573
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 17 deletions.
16 changes: 10 additions & 6 deletions src/pudl/extract/xbrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import io
from datetime import date
from pathlib import Path
from urllib.parse import urlparse

from dagster import Field, Noneable, op
from ferc_xbrl_extractor.cli import run_main
Expand Down Expand Up @@ -85,12 +86,17 @@ def xbrl2sqlite(context) -> None:
logger.info(f"Dataset ferc{form}_xbrl is disabled, skipping")
continue

sql_path = Path(urlparse(PudlPaths().sqlite_db(f"ferc{form.value}_xbrl")).path)

if clobber:
sql_path.unlink(missing_ok=True)

convert_form(
settings,
form,
datastore,
output_path=output_path,
clobber=clobber,
sql_path=sql_path,
batch_size=batch_size,
workers=workers,
)
Expand All @@ -101,7 +107,7 @@ def convert_form(
form: XbrlFormNumber,
datastore: FercXbrlDatastore,
output_path: Path,
clobber: bool,
sql_path: Path,
batch_size: int | None = None,
workers: int | None = None,
) -> None:
Expand All @@ -128,10 +134,8 @@ def convert_form(

run_main(
instance_path=filings_archive,
sql_path=PudlPaths()
.sqlite_db(f"ferc{form.value}_xbrl")
.removeprefix("sqlite:///"), # Temp hacky solution
clobber=clobber,
sql_path=sql_path,
clobber=False, # if we set clobber=True, clobbers on *every* call to run_main
taxonomy=taxonomy_archive,
entry_point=taxonomy_entry_point,
form_number=form.value,
Expand Down
5 changes: 3 additions & 2 deletions src/pudl/io_managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,8 +426,9 @@ def __init__(

super().__init__(base_dir, db_name, md, timeout)

existing_schema_context = MigrationContext.configure(self.engine.connect())
metadata_diff = compare_metadata(existing_schema_context, self.md)
with self.engine.connect() as conn:
existing_schema_context = MigrationContext.configure(conn)
metadata_diff = compare_metadata(existing_schema_context, self.md)
if metadata_diff:
logger.info(f"Metadata diff:\n\n{metadata_diff}")
raise RuntimeError(
Expand Down
9 changes: 4 additions & 5 deletions src/pudl/workspace/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ class Config:
@property
def input_dir(self) -> Path:
"""Path to PUDL input directory."""
return Path(self.pudl_input)
return Path(self.pudl_input).absolute()

@property
def output_dir(self) -> Path:
"""Path to PUDL output directory."""
return Path(self.pudl_output)
return Path(self.pudl_output).absolute()

@property
def settings_dir(self) -> Path:
Expand All @@ -83,9 +83,8 @@ def sqlite_db(self, name: str) -> str:
The name is expected to be the name of the database without the .sqlite
suffix. E.g. pudl, ferc1 and so on.
"""
db_path = PudlPaths().output_dir / f"{name}.sqlite"
return f"sqlite:///{db_path}"
return self.output_dir / f"{name}.sqlite"
db_path = self.output_dir / f"{name}.sqlite"
return f"sqlite://{db_path}"

def output_file(self, filename: str) -> Path:
"""Path to file in PUDL output directory."""
Expand Down
8 changes: 4 additions & 4 deletions test/unit/extract/xbrl_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@ def test_xbrl2sqlite(settings, forms, mocker):
form,
mock_datastore,
output_path=PudlPaths().output_dir,
sql_path=PudlPaths().output_dir / f"ferc{form.value}_xbrl.sqlite",
batch_size=20,
workers=10,
clobber=True,
)


Expand Down Expand Up @@ -158,7 +158,7 @@ def get_filings(self, year, form: XbrlFormNumber):
form,
FakeDatastore(),
output_path=output_path,
clobber=True,
sql_path=output_path / f"ferc{form.value}_xbrl.sqlite",
batch_size=10,
workers=5,
)
Expand All @@ -169,8 +169,8 @@ def get_filings(self, year, form: XbrlFormNumber):
expected_calls.append(
mocker.call(
instance_path=f"filings_{year}_{form.value}",
sql_path=str(output_path / f"ferc{form.value}_xbrl.sqlite"),
clobber=True,
sql_path=output_path / f"ferc{form.value}_xbrl.sqlite",
clobber=False,
taxonomy=f"raw_archive_{year}_{form.value}",
entry_point=f"taxonomy_entry_point_{year}_{form.value}",
form_number=form.value,
Expand Down

0 comments on commit c19d573

Please sign in to comment.