From 7164f923b10fac23d1376dcdca2c622d7ca6976c Mon Sep 17 00:00:00 2001 From: Hussain Jafari Date: Fri, 18 Oct 2024 14:15:32 -0700 Subject: [PATCH] remove node value and nesteddict types (#467) Category: refactor JIRA issue: MIC-5359 Remove references to NodeValue and NestedDict types from layered config tree. Testing mypy passes. --- docs/nitpick-exceptions | 4 ---- src/pseudopeople/configuration/generator.py | 13 ++++++------- src/pseudopeople/interface.py | 19 +++++++++---------- tests/integration/conftest.py | 18 +++++++++--------- tests/unit/test_column_noise.py | 1 - tests/unit/test_configuration.py | 4 ++-- 6 files changed, 26 insertions(+), 33 deletions(-) diff --git a/docs/nitpick-exceptions b/docs/nitpick-exceptions index 2e4220c4..12ddc5cf 100644 --- a/docs/nitpick-exceptions +++ b/docs/nitpick-exceptions @@ -10,7 +10,3 @@ py:class pandas.core.generic.PandasObject # pseudopeople py:exc ConfigurationError py:exc DataSourceError - -# layered_config_tree -py:class NestedDict -py:class NestedDictValue \ No newline at end of file diff --git a/src/pseudopeople/configuration/generator.py b/src/pseudopeople/configuration/generator.py index 0e7d68bd..a05e5df7 100644 --- a/src/pseudopeople/configuration/generator.py +++ b/src/pseudopeople/configuration/generator.py @@ -1,10 +1,9 @@ from collections.abc import Sequence from pathlib import Path -from typing import Dict, Optional, Union +from typing import Any, Optional, Union import yaml from layered_config_tree import LayeredConfigTree -from layered_config_tree.types import NestedDict from pseudopeople.configuration import NO_NOISE, Keys from pseudopeople.configuration.noise_configuration import NoiseConfiguration @@ -84,7 +83,7 @@ def get_configuration( - overrides: Optional[Union[Path, str, NestedDict]] = None, + overrides: Optional[Union[Path, str, dict[str, Any]]] = None, dataset_schema: Optional[DatasetSchema] = None, filters: Sequence[DataFilter] = (), ) -> NoiseConfiguration: @@ -185,7 +184,7 @@ def get_noise_type_dict(noise_type: NoiseType, is_no_noise: bool) -> dict[str, f def add_overrides( noising_configuration: LayeredConfigTree, - overrides: Dict, + overrides: dict, dataset_schema: Optional[DatasetSchema] = None, filters: Sequence[DataFilter] = (), ) -> None: @@ -199,7 +198,7 @@ def add_overrides( validate_noise_level_proportions(noising_configuration, dataset_schema, filters) -def _format_overrides(default_config: LayeredConfigTree, user_dict: Dict) -> Dict: +def _format_overrides(default_config: LayeredConfigTree, user_dict: dict) -> dict: """Formats the user's configuration file as necessary, so it can properly update noising configuration to be used """ @@ -208,8 +207,8 @@ def _format_overrides(default_config: LayeredConfigTree, user_dict: Dict) -> Dic def _format_misreport_age_perturbations( - default_config: LayeredConfigTree, user_dict: Dict -) -> Dict: + default_config: LayeredConfigTree, user_dict: dict +) -> dict: # Format any age perturbation lists as a dictionary with uniform probabilities for dataset_schema in user_dict: user_perturbations = ( diff --git a/src/pseudopeople/interface.py b/src/pseudopeople/interface.py index 68df02f2..0457905b 100644 --- a/src/pseudopeople/interface.py +++ b/src/pseudopeople/interface.py @@ -1,9 +1,8 @@ from collections.abc import Sequence from pathlib import Path -from typing import Literal, Optional, Union, cast +from typing import Any, Literal, Optional, Union, cast import pandas as pd -from layered_config_tree.types import NestedDict from loguru import logger from packaging.version import Version, parse from tqdm import tqdm @@ -29,7 +28,7 @@ def _generate_dataset( dataset_schema: DatasetSchema, source: Optional[Union[Path, str]], seed: int, - config: Optional[Union[Path, str, NestedDict]], + config: Optional[Union[Path, str, dict[str, Any]]], filters: Sequence[DataFilter], verbose: bool = False, engine_name: Literal["pandas", "dask"] = "pandas", @@ -205,7 +204,7 @@ def _get_data_changelog_version(changelog: Path) -> Version: def generate_decennial_census( source: Optional[Union[Path, str]] = None, seed: int = 0, - config: Optional[Union[Path, str, NestedDict]] = None, + config: Optional[Union[Path, str, dict[str, Any]]] = None, year: Optional[int] = 2020, state: Optional[str] = None, verbose: bool = False, @@ -303,7 +302,7 @@ def generate_decennial_census( def generate_american_community_survey( source: Optional[Union[Path, str]] = None, seed: int = 0, - config: Optional[Union[Path, str, NestedDict]] = None, + config: Optional[Union[Path, str, dict[str, Any]]] = None, year: Optional[int] = 2020, state: Optional[str] = None, verbose: bool = False, @@ -416,7 +415,7 @@ def generate_american_community_survey( def generate_current_population_survey( source: Optional[Union[Path, str]] = None, seed: int = 0, - config: Optional[Union[Path, str, NestedDict]] = None, + config: Optional[Union[Path, str, dict[str, Any]]] = None, year: Optional[int] = 2020, state: Optional[str] = None, verbose: bool = False, @@ -530,7 +529,7 @@ def generate_current_population_survey( def generate_taxes_w2_and_1099( source: Optional[Union[Path, str]] = None, seed: int = 0, - config: Optional[Union[Path, str, NestedDict]] = None, + config: Optional[Union[Path, str, dict[str, Any]]] = None, year: Optional[int] = 2020, state: Optional[str] = None, verbose: bool = False, @@ -628,7 +627,7 @@ def generate_taxes_w2_and_1099( def generate_women_infants_and_children( source: Optional[Union[Path, str]] = None, seed: int = 0, - config: Optional[Union[Path, str, NestedDict]] = None, + config: Optional[Union[Path, str, dict[str, Any]]] = None, year: Optional[int] = 2020, state: Optional[str] = None, verbose: bool = False, @@ -731,7 +730,7 @@ def generate_women_infants_and_children( def generate_social_security( source: Optional[Union[Path, str]] = None, seed: int = 0, - config: Optional[Union[Path, str, NestedDict]] = None, + config: Optional[Union[Path, str, dict[str, Any]]] = None, year: Optional[int] = 2020, verbose: bool = False, engine: Literal["pandas", "dask"] = "pandas", @@ -819,7 +818,7 @@ def generate_social_security( def generate_taxes_1040( source: Optional[Union[Path, str]] = None, seed: int = 0, - config: Optional[Union[Path, str, NestedDict]] = None, + config: Optional[Union[Path, str, dict[str, Any]]] = None, year: Optional[int] = 2020, state: Optional[str] = None, verbose: bool = False, diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index cdf3c0c7..7bc01369 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,9 +1,9 @@ from pathlib import Path +from typing import Any import pandas as pd import pytest from _pytest.legacypath import TempdirFactory -from layered_config_tree.types import NestedDict from pseudopeople.configuration import Keys, get_configuration from pseudopeople.constants import paths @@ -120,7 +120,7 @@ def split_sample_data_dir_state_edit( @pytest.fixture(scope="module") -def config() -> NestedDict: +def config() -> dict[str, Any]: """Returns a custom configuration dict to be used in noising""" config = get_configuration().to_dict() # default config @@ -167,37 +167,37 @@ def config() -> NestedDict: # Noised sample datasets @pytest.fixture(scope="module") -def noised_sample_data_decennial_census(config: NestedDict) -> pd.DataFrame: +def noised_sample_data_decennial_census(config: dict[str, Any]) -> pd.DataFrame: return generate_decennial_census(seed=SEED, year=None, config=config) @pytest.fixture(scope="module") -def noised_sample_data_american_community_survey(config: NestedDict) -> pd.DataFrame: +def noised_sample_data_american_community_survey(config: dict[str, Any]) -> pd.DataFrame: return generate_american_community_survey(seed=SEED, year=None, config=config) @pytest.fixture(scope="module") -def noised_sample_data_current_population_survey(config: NestedDict) -> pd.DataFrame: +def noised_sample_data_current_population_survey(config: dict[str, Any]) -> pd.DataFrame: return generate_current_population_survey(seed=SEED, year=None, config=config) @pytest.fixture(scope="module") -def noised_sample_data_women_infants_and_children(config: NestedDict) -> pd.DataFrame: +def noised_sample_data_women_infants_and_children(config: dict[str, Any]) -> pd.DataFrame: return generate_women_infants_and_children(seed=SEED, year=None, config=config) @pytest.fixture(scope="module") -def noised_sample_data_social_security(config: NestedDict) -> pd.DataFrame: +def noised_sample_data_social_security(config: dict[str, Any]) -> pd.DataFrame: return generate_social_security(seed=SEED, year=None, config=config) @pytest.fixture(scope="module") -def noised_sample_data_taxes_w2_and_1099(config: NestedDict) -> pd.DataFrame: +def noised_sample_data_taxes_w2_and_1099(config: dict[str, Any]) -> pd.DataFrame: return generate_taxes_w2_and_1099(seed=SEED, year=None, config=config) @pytest.fixture(scope="module") -def noised_sample_data_taxes_1040(config: NestedDict) -> pd.DataFrame: +def noised_sample_data_taxes_1040(config: dict[str, Any]) -> pd.DataFrame: return generate_taxes_1040(seed=SEED, year=None, config=config) diff --git a/tests/unit/test_column_noise.py b/tests/unit/test_column_noise.py index fa39f82d..f358c0c9 100644 --- a/tests/unit/test_column_noise.py +++ b/tests/unit/test_column_noise.py @@ -9,7 +9,6 @@ import pandas as pd import pytest from layered_config_tree import LayeredConfigTree -from layered_config_tree.types import NestedDict, NodeValue from pseudopeople.configuration import Keys, get_configuration from pseudopeople.configuration.noise_configuration import NoiseConfiguration diff --git a/tests/unit/test_configuration.py b/tests/unit/test_configuration.py index 86215a85..8877e075 100644 --- a/tests/unit/test_configuration.py +++ b/tests/unit/test_configuration.py @@ -153,7 +153,7 @@ def test_get_configuration_with_user_override(mocker: MockerFixture) -> None: }, } } - _ = get_configuration(config) # type: ignore [arg-type] + _ = get_configuration(config) mock.assert_called_once_with(layers=["baseline", "default", "user"]) update_calls = [ call @@ -228,7 +228,7 @@ def test_format_miswrite_ages( Keys.COLUMN_NOISE: { COLUMNS.age.name: { NOISE_TYPES.misreport_age.name: { - Keys.POSSIBLE_AGE_DIFFERENCES: age_differences, # type: ignore [dict-item] + Keys.POSSIBLE_AGE_DIFFERENCES: age_differences, }, }, },