Skip to content

Commit

Permalink
remove node value and nesteddict types (#467)
Browse files Browse the repository at this point in the history
Category: refactor
JIRA issue: MIC-5359

Remove references to NodeValue and NestedDict types from layered config tree.

Testing
mypy passes.
  • Loading branch information
hussain-jafari authored and rmudambi committed Nov 22, 2024
1 parent f6b0fc6 commit 7164f92
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 33 deletions.
4 changes: 0 additions & 4 deletions docs/nitpick-exceptions
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,3 @@ py:class pandas.core.generic.PandasObject
# pseudopeople
py:exc ConfigurationError
py:exc DataSourceError

# layered_config_tree
py:class NestedDict
py:class NestedDictValue
13 changes: 6 additions & 7 deletions src/pseudopeople/configuration/generator.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from collections.abc import Sequence
from pathlib import Path
from typing import Dict, Optional, Union
from typing import Any, Optional, Union

import yaml
from layered_config_tree import LayeredConfigTree
from layered_config_tree.types import NestedDict

from pseudopeople.configuration import NO_NOISE, Keys
from pseudopeople.configuration.noise_configuration import NoiseConfiguration
Expand Down Expand Up @@ -84,7 +83,7 @@


def get_configuration(
overrides: Optional[Union[Path, str, NestedDict]] = None,
overrides: Optional[Union[Path, str, dict[str, Any]]] = None,
dataset_schema: Optional[DatasetSchema] = None,
filters: Sequence[DataFilter] = (),
) -> NoiseConfiguration:
Expand Down Expand Up @@ -185,7 +184,7 @@ def get_noise_type_dict(noise_type: NoiseType, is_no_noise: bool) -> dict[str, f

def add_overrides(
noising_configuration: LayeredConfigTree,
overrides: Dict,
overrides: dict,
dataset_schema: Optional[DatasetSchema] = None,
filters: Sequence[DataFilter] = (),
) -> None:
Expand All @@ -199,7 +198,7 @@ def add_overrides(
validate_noise_level_proportions(noising_configuration, dataset_schema, filters)


def _format_overrides(default_config: LayeredConfigTree, user_dict: Dict) -> Dict:
def _format_overrides(default_config: LayeredConfigTree, user_dict: dict) -> dict:
"""Formats the user's configuration file as necessary, so it can properly
update noising configuration to be used
"""
Expand All @@ -208,8 +207,8 @@ def _format_overrides(default_config: LayeredConfigTree, user_dict: Dict) -> Dic


def _format_misreport_age_perturbations(
default_config: LayeredConfigTree, user_dict: Dict
) -> Dict:
default_config: LayeredConfigTree, user_dict: dict
) -> dict:
# Format any age perturbation lists as a dictionary with uniform probabilities
for dataset_schema in user_dict:
user_perturbations = (
Expand Down
19 changes: 9 additions & 10 deletions src/pseudopeople/interface.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from collections.abc import Sequence
from pathlib import Path
from typing import Literal, Optional, Union, cast
from typing import Any, Literal, Optional, Union, cast

import pandas as pd
from layered_config_tree.types import NestedDict
from loguru import logger
from packaging.version import Version, parse
from tqdm import tqdm
Expand All @@ -29,7 +28,7 @@ def _generate_dataset(
dataset_schema: DatasetSchema,
source: Optional[Union[Path, str]],
seed: int,
config: Optional[Union[Path, str, NestedDict]],
config: Optional[Union[Path, str, dict[str, Any]]],
filters: Sequence[DataFilter],
verbose: bool = False,
engine_name: Literal["pandas", "dask"] = "pandas",
Expand Down Expand Up @@ -205,7 +204,7 @@ def _get_data_changelog_version(changelog: Path) -> Version:
def generate_decennial_census(
source: Optional[Union[Path, str]] = None,
seed: int = 0,
config: Optional[Union[Path, str, NestedDict]] = None,
config: Optional[Union[Path, str, dict[str, Any]]] = None,
year: Optional[int] = 2020,
state: Optional[str] = None,
verbose: bool = False,
Expand Down Expand Up @@ -303,7 +302,7 @@ def generate_decennial_census(
def generate_american_community_survey(
source: Optional[Union[Path, str]] = None,
seed: int = 0,
config: Optional[Union[Path, str, NestedDict]] = None,
config: Optional[Union[Path, str, dict[str, Any]]] = None,
year: Optional[int] = 2020,
state: Optional[str] = None,
verbose: bool = False,
Expand Down Expand Up @@ -416,7 +415,7 @@ def generate_american_community_survey(
def generate_current_population_survey(
source: Optional[Union[Path, str]] = None,
seed: int = 0,
config: Optional[Union[Path, str, NestedDict]] = None,
config: Optional[Union[Path, str, dict[str, Any]]] = None,
year: Optional[int] = 2020,
state: Optional[str] = None,
verbose: bool = False,
Expand Down Expand Up @@ -530,7 +529,7 @@ def generate_current_population_survey(
def generate_taxes_w2_and_1099(
source: Optional[Union[Path, str]] = None,
seed: int = 0,
config: Optional[Union[Path, str, NestedDict]] = None,
config: Optional[Union[Path, str, dict[str, Any]]] = None,
year: Optional[int] = 2020,
state: Optional[str] = None,
verbose: bool = False,
Expand Down Expand Up @@ -628,7 +627,7 @@ def generate_taxes_w2_and_1099(
def generate_women_infants_and_children(
source: Optional[Union[Path, str]] = None,
seed: int = 0,
config: Optional[Union[Path, str, NestedDict]] = None,
config: Optional[Union[Path, str, dict[str, Any]]] = None,
year: Optional[int] = 2020,
state: Optional[str] = None,
verbose: bool = False,
Expand Down Expand Up @@ -731,7 +730,7 @@ def generate_women_infants_and_children(
def generate_social_security(
source: Optional[Union[Path, str]] = None,
seed: int = 0,
config: Optional[Union[Path, str, NestedDict]] = None,
config: Optional[Union[Path, str, dict[str, Any]]] = None,
year: Optional[int] = 2020,
verbose: bool = False,
engine: Literal["pandas", "dask"] = "pandas",
Expand Down Expand Up @@ -819,7 +818,7 @@ def generate_social_security(
def generate_taxes_1040(
source: Optional[Union[Path, str]] = None,
seed: int = 0,
config: Optional[Union[Path, str, NestedDict]] = None,
config: Optional[Union[Path, str, dict[str, Any]]] = None,
year: Optional[int] = 2020,
state: Optional[str] = None,
verbose: bool = False,
Expand Down
18 changes: 9 additions & 9 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from pathlib import Path
from typing import Any

import pandas as pd
import pytest
from _pytest.legacypath import TempdirFactory
from layered_config_tree.types import NestedDict

from pseudopeople.configuration import Keys, get_configuration
from pseudopeople.constants import paths
Expand Down Expand Up @@ -120,7 +120,7 @@ def split_sample_data_dir_state_edit(


@pytest.fixture(scope="module")
def config() -> NestedDict:
def config() -> dict[str, Any]:
"""Returns a custom configuration dict to be used in noising"""
config = get_configuration().to_dict() # default config

Expand Down Expand Up @@ -167,37 +167,37 @@ def config() -> NestedDict:

# Noised sample datasets
@pytest.fixture(scope="module")
def noised_sample_data_decennial_census(config: NestedDict) -> pd.DataFrame:
def noised_sample_data_decennial_census(config: dict[str, Any]) -> pd.DataFrame:
return generate_decennial_census(seed=SEED, year=None, config=config)


@pytest.fixture(scope="module")
def noised_sample_data_american_community_survey(config: NestedDict) -> pd.DataFrame:
def noised_sample_data_american_community_survey(config: dict[str, Any]) -> pd.DataFrame:
return generate_american_community_survey(seed=SEED, year=None, config=config)


@pytest.fixture(scope="module")
def noised_sample_data_current_population_survey(config: NestedDict) -> pd.DataFrame:
def noised_sample_data_current_population_survey(config: dict[str, Any]) -> pd.DataFrame:
return generate_current_population_survey(seed=SEED, year=None, config=config)


@pytest.fixture(scope="module")
def noised_sample_data_women_infants_and_children(config: NestedDict) -> pd.DataFrame:
def noised_sample_data_women_infants_and_children(config: dict[str, Any]) -> pd.DataFrame:
return generate_women_infants_and_children(seed=SEED, year=None, config=config)


@pytest.fixture(scope="module")
def noised_sample_data_social_security(config: NestedDict) -> pd.DataFrame:
def noised_sample_data_social_security(config: dict[str, Any]) -> pd.DataFrame:
return generate_social_security(seed=SEED, year=None, config=config)


@pytest.fixture(scope="module")
def noised_sample_data_taxes_w2_and_1099(config: NestedDict) -> pd.DataFrame:
def noised_sample_data_taxes_w2_and_1099(config: dict[str, Any]) -> pd.DataFrame:
return generate_taxes_w2_and_1099(seed=SEED, year=None, config=config)


@pytest.fixture(scope="module")
def noised_sample_data_taxes_1040(config: NestedDict) -> pd.DataFrame:
def noised_sample_data_taxes_1040(config: dict[str, Any]) -> pd.DataFrame:
return generate_taxes_1040(seed=SEED, year=None, config=config)


Expand Down
1 change: 0 additions & 1 deletion tests/unit/test_column_noise.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import pandas as pd
import pytest
from layered_config_tree import LayeredConfigTree
from layered_config_tree.types import NestedDict, NodeValue

from pseudopeople.configuration import Keys, get_configuration
from pseudopeople.configuration.noise_configuration import NoiseConfiguration
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def test_get_configuration_with_user_override(mocker: MockerFixture) -> None:
},
}
}
_ = get_configuration(config) # type: ignore [arg-type]
_ = get_configuration(config)
mock.assert_called_once_with(layers=["baseline", "default", "user"])
update_calls = [
call
Expand Down Expand Up @@ -228,7 +228,7 @@ def test_format_miswrite_ages(
Keys.COLUMN_NOISE: {
COLUMNS.age.name: {
NOISE_TYPES.misreport_age.name: {
Keys.POSSIBLE_AGE_DIFFERENCES: age_differences, # type: ignore [dict-item]
Keys.POSSIBLE_AGE_DIFFERENCES: age_differences,
},
},
},
Expand Down

0 comments on commit 7164f92

Please sign in to comment.