Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

409 imputation validation #49

Merged
merged 3 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions mbs_results/validate_imputation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import pandas as pd


def validate_imputation(df: pd.DataFrame, target: str) -> None:
"""
Validation for the imputation, including:
- no missing values in target column

Parameters
----------
df : pd.DataFrame
data with imputed values
target : str
name of column containing target variable

Raises
------
"""
if df[target].isna().any():
raise ValueError(
f"""
Target column should have no missing values following imputation:
missing values found in column {target}
"""
)
4 changes: 4 additions & 0 deletions tests/data/validate_imputation/target_missing_values.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
no_missing,one_missing,all_missing
11,14,,
12,15,,
13,,
8 changes: 8 additions & 0 deletions tests/helper_functions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from contextlib import contextmanager
from pathlib import Path

import pandas as pd
Expand All @@ -22,3 +23,10 @@ def load_filter(filter_path):
df["date"] = pd.to_datetime(df["date"], format="%Y%m")

return df


# when updating to python>=3.7 this can be replaced by importing
# contextlib.nullcontext as does_not_raise
@contextmanager
def does_not_raise():
yield
36 changes: 36 additions & 0 deletions tests/test_validate_imputation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from pathlib import Path

import pandas as pd
import pytest
from helper_functions import does_not_raise

from mbs_results.validate_imputation import validate_imputation


@pytest.fixture(scope="class")
def filepath():
return Path("tests/data/validate_imputation")


@pytest.fixture(scope="class")
def missing_target_values_data(filepath):
return pd.read_csv(filepath / "target_missing_values.csv", index_col=False)


class TestValidateImputation:
@pytest.mark.parametrize(
"target_column_name,expectation",
[
("no_missing", does_not_raise()),
("one_missing", pytest.raises(ValueError)),
("all_missing", pytest.raises(ValueError)),
],
)
def test_target_missing_values_validation(
self,
missing_target_values_data,
target_column_name,
expectation,
):
with expectation:
validate_imputation(missing_target_values_data, target_column_name)
Loading