diff --git a/src/pseudopeople/configuration/generator.py b/src/pseudopeople/configuration/generator.py index 7420abb0..dde53121 100644 --- a/src/pseudopeople/configuration/generator.py +++ b/src/pseudopeople/configuration/generator.py @@ -226,7 +226,14 @@ def _format_misreport_age_perturbations( if not user_perturbations: continue formatted = {} - default_perturbations: dict[int, float] = default_config.get_tree(dataset_schema).get_tree(Keys.COLUMN_NOISE).get_tree("age").get_tree(NOISE_TYPES.misreport_age.name).get(Keys.POSSIBLE_AGE_DIFFERENCES).to_dict() + default_perturbations: dict[int, float] = ( + default_config.get_tree(dataset_schema) + .get_tree(Keys.COLUMN_NOISE) + .get_tree("age") + .get_tree(NOISE_TYPES.misreport_age.name) + .get(Keys.POSSIBLE_AGE_DIFFERENCES) + .to_dict() + ) # Replace default configuration with 0 probabilities for perturbation in default_perturbations: formatted[perturbation] = 0.0 diff --git a/src/pseudopeople/configuration/noise_configuration.py b/src/pseudopeople/configuration/noise_configuration.py index 590a1d6d..4a83be2a 100644 --- a/src/pseudopeople/configuration/noise_configuration.py +++ b/src/pseudopeople/configuration/noise_configuration.py @@ -25,7 +25,7 @@ def __init__(self, config: LayeredConfigTree): def to_dict(self) -> dict: # TODO: remove ignore when dropping support for Python 3.9 - config_dict: dict = self._config.to_dict() # type: ignore [assignment] + config_dict: dict = self._config.to_dict() # type: ignore [assignment] return config_dict def get_value( @@ -81,7 +81,7 @@ def get_value( noise_value: int | float | LayeredConfigTree = parameter_tree.get(parameter_name) converted_noise_value: int | float | dict = ( # not sure how to tell mypy the types in this dict - noise_value.to_dict() # type: ignore [assignment] + noise_value.to_dict() # type: ignore [assignment] if isinstance(noise_value, LayeredConfigTree) else noise_value ) diff --git a/src/pseudopeople/configuration/validator.py b/src/pseudopeople/configuration/validator.py index 294ffabb..e3fddea3 100644 --- a/src/pseudopeople/configuration/validator.py +++ b/src/pseudopeople/configuration/validator.py @@ -47,8 +47,12 @@ def validate_overrides(overrides: Any, default_config: LayeredConfigTree) -> Non default_dataset_config, key, "configuration key", dataset_name ) - default_row_noise_config: LayeredConfigTree = default_dataset_config.get_tree(Keys.ROW_NOISE) - default_column_noise_config: LayeredConfigTree = default_dataset_config.get_tree(Keys.COLUMN_NOISE) + default_row_noise_config: LayeredConfigTree = default_dataset_config.get_tree( + Keys.ROW_NOISE + ) + default_column_noise_config: LayeredConfigTree = default_dataset_config.get_tree( + Keys.COLUMN_NOISE + ) row_noise_config = dataset_config.get(Keys.ROW_NOISE, {}) if not isinstance(row_noise_config, dict): @@ -346,9 +350,12 @@ def validate_noise_level_proportions( # Note: Using pd.isnull here and above because np.isnan does not work on strings if NOISE_TYPES.duplicate_with_guardian in dataset_schema.row_noise_types: # Config level for guardian duplication group - config_noise_level = configuration_tree.get_tree(row["dataset"]).get_tree(Keys.ROW_NOISE).get_tree( - NOISE_TYPES.duplicate_with_guardian.name - ).get(row["noise_type"]) + config_noise_level = ( + configuration_tree.get_tree(row["dataset"]) + .get_tree(Keys.ROW_NOISE) + .get_tree(NOISE_TYPES.duplicate_with_guardian.name) + .get(row["noise_type"]) + ) entity_type = Keys.ROW_NOISE else: # I have preloaded the metadata for ACS and CPS to have the duplicate with @@ -356,9 +363,13 @@ def validate_noise_level_proportions( continue else: # Config level for each column noise type - config_noise_level = configuration_tree.get_tree(row["dataset"]).get_tree(Keys.COLUMN_NOISE).get_tree( - row["column"] - ).get_tree(row["noise_type"]).get(Keys.CELL_PROBABILITY) + config_noise_level = ( + configuration_tree.get_tree(row["dataset"]) + .get_tree(Keys.COLUMN_NOISE) + .get_tree(row["column"]) + .get_tree(row["noise_type"]) + .get(Keys.CELL_PROBABILITY) + ) entity_type = Keys.COLUMN_NOISE max_noise_level = row["proportion"] if config_noise_level > max_noise_level: