diff --git a/.github/workflows/dev-pytest.yml b/.github/workflows/dev-pytest.yml index e325e82..8859fda 100644 --- a/.github/workflows/dev-pytest.yml +++ b/.github/workflows/dev-pytest.yml @@ -72,4 +72,4 @@ jobs: echo "GITHUB WORKSPACE: ${GITHUB_WORKSPACE}" chmod +x testSchemachange.sh bash testSchemachange.sh - working-directory: . \ No newline at end of file + working-directory: . diff --git a/populateConnection.sh b/populateConnection.sh index 292039b..0e75860 100644 --- a/populateConnection.sh +++ b/populateConnection.sh @@ -9,4 +9,4 @@ echo warehouse = \"${SNOWFLAKE_WAREHOUSE}\" >> ./connections.toml echo database = \"${SNOWFLAKE_DATABASE}\" >> ./connections.toml echo password = \"${SNOWFLAKE_PASSWORD}\" >> ./connections.toml echo "cat connections.toml" -cat ./connections.toml \ No newline at end of file +cat ./connections.toml diff --git a/schemachange/config/ChangeHistoryTable.py b/schemachange/config/ChangeHistoryTable.py index 7491911..2119423 100644 --- a/schemachange/config/ChangeHistoryTable.py +++ b/schemachange/config/ChangeHistoryTable.py @@ -17,7 +17,7 @@ class ChangeHistoryTable: @property def fully_qualified(self) -> str: return f"{self.database_name}.{self.schema_name}.{self.table_name}" - + @property def fully_qualified_schema_name(self) -> str: return f"{self.database_name}.{self.schema_name}" diff --git a/schemachange/config/DeployConfig.py b/schemachange/config/DeployConfig.py index b42fc52..561273b 100644 --- a/schemachange/config/DeployConfig.py +++ b/schemachange/config/DeployConfig.py @@ -8,9 +8,9 @@ from schemachange.config.ChangeHistoryTable import ChangeHistoryTable from schemachange.config.utils import ( get_snowflake_identifier_string, - get_snowflake_password, ) + @dataclasses.dataclass(frozen=True) class DeployConfig(BaseConfig): subcommand: Literal["deploy"] = "deploy" @@ -84,12 +84,12 @@ def get_session_kwargs(self) -> dict: "connection_name": self.connection_name, "change_history_table": self.change_history_table, "autocommit": self.autocommit, - "query_tag": self.query_tag + "query_tag": self.query_tag, } - - # TODO: Discuss the need for check for snowflake password before passing the session + + # TODO: Discuss the need for check for snowflake password before passing the session # kwargs to open a snowflake session # snowflake_password = get_snowflake_password() # if snowflake_password is not None and snowflake_password: - # session_kwargs["password"] = snowflake_password + # session_kwargs["password"] = snowflake_password return {k: v for k, v in session_kwargs.items() if v is not None} diff --git a/schemachange/config/get_merged_config.py b/schemachange/config/get_merged_config.py index 604f1f4..f1a2ad5 100644 --- a/schemachange/config/get_merged_config.py +++ b/schemachange/config/get_merged_config.py @@ -11,7 +11,7 @@ from schemachange.config.utils import ( load_yaml_config, validate_directory, - validate_file_path + validate_file_path, ) diff --git a/schemachange/redact_config_secrets.py b/schemachange/redact_config_secrets.py index cc8e4e6..43b8107 100644 --- a/schemachange/redact_config_secrets.py +++ b/schemachange/redact_config_secrets.py @@ -2,7 +2,7 @@ import copy import warnings -from typing import Callable +from typing import Callable, Any import structlog from structlog import PrintLogger @@ -14,39 +14,41 @@ def get_redact_config_secrets_processor( def redact_config_secrets_processor( _: PrintLogger, __: str, event_dict: dict ) -> dict: - def redact_dict(level: int, sub_event_dict: dict) -> dict: + def redact_value(level: int, value: Any): if level > 6: warnings.warn( "Unable to redact deeply nested secrets in log: %(event)s" % {"event": event_dict["event"]} ) - return sub_event_dict - for sub_k, sub_v in sub_event_dict.items(): - if isinstance(sub_v, dict): - sub_event_dict[sub_k] = redact_dict( - level=level + 1, sub_event_dict=sub_v - ) - elif isinstance(sub_v, str): - for secret in config_secrets: - if secret in sub_v: - sub_event_dict[sub_k] = sub_event_dict[sub_k].replace( - secret, "*" * len(secret) - ) - elif isinstance(sub_v, int): - for secret in config_secrets: - if secret in str(sub_v): - sub_event_dict[sub_k] = str(sub_event_dict[sub_k]).replace( - secret, "*" * len(secret) - ) - else: + return value + if isinstance(value, dict): + for sub_k, sub_v in value.items(): + value[sub_k] = redact_value(level=level + 1, value=sub_v) + return value + elif isinstance(value, list): + for i, sub_v in enumerate(value): + value[i] = redact_value(level=level + 1, value=sub_v) + return value + elif isinstance(value, set): + return {redact_value(level=level + 1, value=sub_v) for sub_v in value} + elif isinstance(value, tuple): + return tuple( + redact_value(level=level + 1, value=sub_v) for sub_v in value + ) + elif not isinstance(value, str): + try: + value = str(value) + except Exception: warnings.warn( "Unable to redact %(type)s log arguments in log: %(event)s" - % {"type": type(sub_v).__name__, "event": event_dict["event"]} + % {"type": type(value).__name__, "event": event_dict["event"]} ) - return sub_event_dict - return sub_event_dict + return value + for secret in config_secrets: + value = value.replace(secret, "*" * len(secret)) + return value - return redact_dict(level=0, sub_event_dict=copy.deepcopy(event_dict)) + return redact_value(level=0, value=copy.deepcopy(event_dict)) return redact_config_secrets_processor diff --git a/tests/test_redact_config_secrets.py b/tests/test_redact_config_secrets.py index 2801f0a..65081d2 100644 --- a/tests/test_redact_config_secrets.py +++ b/tests/test_redact_config_secrets.py @@ -26,35 +26,704 @@ def test_processor_config_with_populated_config_secrets(self): assert len(prev_cfg["processors"]) == len(new_cfg["processors"]) +# "secrets, extra_kwargs, expected" +cases: list = [ + pytest.param( + {}, + {}, + {}, + id="No secrets, No payload", + ), + pytest.param( + {}, + {"key_1": None}, + {"key_1": "None"}, + id="No secrets, None payload unchanged", + ), + pytest.param( + {}, + {"key_1": "secret"}, + {"key_1": "secret"}, + id="No secrets, string payload unchanged", + ), + pytest.param( + {}, + {"key_1": b"secret"}, + {"key_1": str(b"secret")}, + id="No secrets, bytes payload unchanged", + ), + pytest.param( + {}, + {"key_1": bytearray(5)}, + {"key_1": str(bytearray(5))}, + id="No secrets, bytearray payload unchanged", + ), + pytest.param( + {}, + {"key_1": 3}, + {"key_1": str(3)}, + id="No secrets, int payload unchanged", + ), + pytest.param( + {}, + {"key_1": 3.14}, + {"key_1": str(3.14)}, + id="No secrets, float payload unchanged", + ), + pytest.param( + {}, + {"key_1": 3j}, + {"key_1": str(3j)}, + id="No secrets, complex payload unchanged", + ), + pytest.param( + {}, + {"key_1": True}, + {"key_1": str(True)}, + id="No secrets, boolean payload unchanged", + ), + pytest.param( + {}, + {"key_1": range(6)}, + {"key_1": str(range(6))}, + id="No secrets, range payload unchanged", + ), + pytest.param( + {}, + { + "key_1": [ + None, + "secret", + b"secret", + bytearray(5), + 3, + 3.14, + 3j, + True, + range(6), + ] + }, + { + "key_1": [ + str(None), + "secret", + str(b"secret"), + str(bytearray(5)), + str(3), + str(3.14), + str(3j), + str(True), + str(range(6)), + ] + }, + id="No secrets, list payload unchanged", + ), + pytest.param( + {}, + { + "key_1": ( + None, + "secret", + b"secret", + bytearray(5), + 3, + 3.14, + 3j, + True, + range(6), + ) + }, + { + "key_1": ( + str(None), + "secret", + str(b"secret"), + str(bytearray(5)), + str(3), + str(3.14), + str(3j), + str(True), + str(range(6)), + ) + }, + id="No secrets, tuple payload unchanged", + ), + pytest.param( + {}, + { + "key_1": { + None, + "secret", + b"secret", + 3, + 3.14, + 3j, + True, + range(6), + } + }, + { + "key_1": { + str(None), + "secret", + str(b"secret"), + str(3), + str(3.14), + str(3j), + str(True), + str(range(6)), + } + }, + id="No secrets, set payload unchanged", + ), + pytest.param( + {}, + { + "key_1": { + "key_2a": None, + "key_2b": "secret", + "key_2c": b"secret", + "key_2d": bytearray(5), + "key_2e": 3, + "key_2f": 3.14, + "key_2g": 3j, + "key_2h": True, + "key_2i": range(6), + "key_2j": [ + None, + "secret", + b"secret", + bytearray(5), + 3, + 3.14, + 3j, + True, + range(6), + ], + "key_2k": ( + None, + "secret", + b"secret", + bytearray(5), + 3, + 3.14, + 3j, + True, + range(6), + ), + "key_2l": { + None, + "secret", + b"secret", + 3, + 3.14, + 3j, + True, + range(6), + }, + } + }, + { + "key_1": { + "key_2a": str(None), + "key_2b": "secret", + "key_2c": str(b"secret"), + "key_2d": str(bytearray(5)), + "key_2e": str(3), + "key_2f": str(3.14), + "key_2g": str(3j), + "key_2h": str(True), + "key_2i": str(range(6)), + "key_2j": [ + str(None), + "secret", + str(b"secret"), + str(bytearray(5)), + str(3), + str(3.14), + str(3j), + str(True), + str(range(6)), + ], + "key_2k": ( + str(None), + "secret", + str(b"secret"), + str(bytearray(5)), + str(3), + str(3.14), + str(3j), + str(True), + str(range(6)), + ), + "key_2l": { + str(None), + "secret", + str(b"secret"), + str(3), + str(3.14), + str(3j), + str(True), + str(range(6)), + }, + } + }, + id="No secrets, dict payload unchanged", + ), + pytest.param( + {}, + { + "key_1": { + "key_2": { + "key_3a": None, + "key_3b": "secret", + "key_3c": b"secret", + "key_3d": bytearray(5), + "key_3e": 3, + "key_3f": 3.14, + "key_3g": 3j, + "key_3h": True, + "key_3i": range(6), + "key_3j": [ + None, + "secret", + b"secret", + bytearray(5), + 3, + 3.14, + 3j, + True, + range(6), + ], + "key_3k": ( + None, + "secret", + b"secret", + bytearray(5), + 3, + 3.14, + 3j, + True, + range(6), + ), + "key_3l": { + None, + "secret", + b"secret", + 3, + 3.14, + 3j, + True, + range(6), + }, + } + } + }, + { + "key_1": { + "key_2": { + "key_3a": str(None), + "key_3b": "secret", + "key_3c": str(b"secret"), + "key_3d": str(bytearray(5)), + "key_3e": str(3), + "key_3f": str(3.14), + "key_3g": str(3j), + "key_3h": str(True), + "key_3i": str(range(6)), + "key_3j": [ + str(None), + "secret", + str(b"secret"), + str(bytearray(5)), + str(3), + str(3.14), + str(3j), + str(True), + str(range(6)), + ], + "key_3k": ( + str(None), + "secret", + str(b"secret"), + str(bytearray(5)), + str(3), + str(3.14), + str(3j), + str(True), + str(range(6)), + ), + "key_3l": { + str(None), + "secret", + str(b"secret"), + str(3), + str(3.14), + str(3j), + str(True), + str(range(6)), + }, + } + } + }, + id="No secrets, nested x 1 dict payload unchanged", + ), + pytest.param( + {"secret", "3.14"}, + {}, + {}, + id="Secrets, No payload", + ), + pytest.param( + {"secret", "3.14"}, + {"key_1": None}, + {"key_1": "None"}, + id="Secrets, None payload masked", + ), + pytest.param( + {"secret", "3.14"}, + {"key_1": "secret"}, + {"key_1": "******"}, + id="Secrets, string payload masked", + ), + pytest.param( + {"secret", "3.14"}, + {"key_1": b"secret"}, + {"key_1": str(b"******")}, + id="Secrets, bytes payload masked", + ), + pytest.param( + {"secret", "3.14"}, + {"key_1": bytearray(5)}, + {"key_1": str(bytearray(5))}, + id="Secrets, bytearray payload masked", + ), + pytest.param( + {"secret", "3.14"}, + {"key_1": 3}, + {"key_1": str(3)}, + id="Secrets, int payload masked", + ), + pytest.param( + {"secret", "3.14"}, + {"key_1": 3.14}, + {"key_1": "****"}, + id="Secrets, float payload masked", + ), + pytest.param( + {"secret", "3.14"}, + {"key_1": 3j}, + {"key_1": str(3j)}, + id="Secrets, complex payload masked", + ), + pytest.param( + {"secret", "3.14"}, + {"key_1": True}, + {"key_1": str(True)}, + id="Secrets, boolean payload masked", + ), + pytest.param( + {"secret", "3.14"}, + {"key_1": range(6)}, + {"key_1": str(range(6))}, + id="Secrets, range payload masked", + ), + pytest.param( + {"secret", "3.14"}, + { + "key_1": [ + None, + "secret", + b"secret", + bytearray(5), + 3, + 3.14, + 3j, + True, + range(6), + ] + }, + { + "key_1": [ + str(None), + "******", + str(b"******"), + str(bytearray(5)), + str(3), + "****", + str(3j), + str(True), + str(range(6)), + ] + }, + id="Secrets, list payload masked", + ), + pytest.param( + {"secret", "3.14"}, + { + "key_1": ( + None, + "secret", + b"secret", + bytearray(5), + 3, + 3.14, + 3j, + True, + range(6), + ) + }, + { + "key_1": ( + str(None), + "******", + str(b"******"), + str(bytearray(5)), + str(3), + "****", + str(3j), + str(True), + str(range(6)), + ) + }, + id="Secrets, tuple payload masked", + ), + pytest.param( + {"secret", "3.14"}, + { + "key_1": { + None, + "secret", + b"secret", + 3, + 3.14, + 3j, + True, + range(6), + } + }, + { + "key_1": { + str(None), + "******", + str(b"******"), + str(3), + "****", + str(3j), + str(True), + str(range(6)), + } + }, + id="Secrets, set payload masked", + ), + pytest.param( + {"secret", "3.14"}, + { + "key_1": { + "key_2a": None, + "key_2b": "secret", + "key_2c": b"secret", + "key_2d": bytearray(5), + "key_2e": 3, + "key_2f": 3.14, + "key_2g": 3j, + "key_2h": True, + "key_2i": range(6), + "key_2j": [ + None, + "secret", + b"secret", + bytearray(5), + 3, + 3.14, + 3j, + True, + range(6), + ], + "key_2k": ( + None, + "secret", + b"secret", + bytearray(5), + 3, + 3.14, + 3j, + True, + range(6), + ), + "key_2l": { + None, + "secret", + b"secret", + 3, + 3.14, + 3j, + True, + range(6), + }, + } + }, + { + "key_1": { + "key_2a": str(None), + "key_2b": "******", + "key_2c": str(b"******"), + "key_2d": str(bytearray(5)), + "key_2e": str(3), + "key_2f": "****", + "key_2g": str(3j), + "key_2h": str(True), + "key_2i": str(range(6)), + "key_2j": [ + str(None), + "******", + str(b"******"), + str(bytearray(5)), + str(3), + "****", + str(3j), + str(True), + str(range(6)), + ], + "key_2k": ( + str(None), + "******", + str(b"******"), + str(bytearray(5)), + str(3), + "****", + str(3j), + str(True), + str(range(6)), + ), + "key_2l": { + str(None), + "******", + str(b"******"), + str(3), + "****", + str(3j), + str(True), + str(range(6)), + }, + } + }, + id="Secrets, dict payload masked", + ), + pytest.param( + {"secret", "3.14"}, + { + "key_1": { + "key_2": { + "key_3a": None, + "key_3b": "secret", + "key_3c": b"secret", + "key_3d": bytearray(5), + "key_3e": 3, + "key_3f": 3.14, + "key_3g": 3j, + "key_3h": True, + "key_3i": range(6), + "key_3j": [ + None, + "secret", + b"secret", + bytearray(5), + 3, + 3.14, + 3j, + True, + range(6), + ], + "key_3k": ( + None, + "secret", + b"secret", + bytearray(5), + 3, + 3.14, + 3j, + True, + range(6), + ), + "key_3l": { + None, + "secret", + b"secret", + 3, + 3.14, + 3j, + True, + range(6), + }, + } + } + }, + { + "key_1": { + "key_2": { + "key_3a": str(None), + "key_3b": "******", + "key_3c": str(b"******"), + "key_3d": str(bytearray(5)), + "key_3e": str(3), + "key_3f": "****", + "key_3g": str(3j), + "key_3h": str(True), + "key_3i": str(range(6)), + "key_3j": [ + str(None), + "******", + str(b"******"), + str(bytearray(5)), + str(3), + "****", + str(3j), + str(True), + str(range(6)), + ], + "key_3k": ( + str(None), + "******", + str(b"******"), + str(bytearray(5)), + str(3), + "****", + str(3j), + str(True), + str(range(6)), + ), + "key_3l": { + str(None), + "******", + str(b"******"), + str(3), + "****", + str(3j), + str(True), + str(range(6)), + }, + } + } + }, + id="Secrets, nested x 1 dict payload masked", + ), + pytest.param( + {"secret", "12345"}, + {"key_1": {"key_2": {"key_3a": "secret", "key_3b": 12345}}}, + {"key_1": {"key_2": {"key_3a": "******", "key_3b": "*****"}}}, + id="Secrets, nested x 2 dict payload masked", + ), +] + + +class NoStr: + def __str__(self): + raise NotImplementedError() + + class TestGetRedactConfigSecretsProcessor: - @pytest.mark.parametrize( - "secrets, extra_kwargs, expected", - [ - ({}, {"keyword": "secret"}, {"keyword": "secret"}), - ({}, {}, {}), - ({"secret"}, {"keyword": "secret"}, {"keyword": "******"}), - ( - {"secret"}, - {"keyword": {"keyword": "secret"}}, - {"keyword": {"keyword": "******"}}, - ), - ( - {"secret"}, - {"keyword": {"keyword": {"keyword": "secret"}}}, - {"keyword": {"keyword": {"keyword": "******"}}}, - ), - ( - {"secret"}, - {"keyword": {"keyword": {"keyword": "secret"}}}, - {"keyword": {"keyword": {"keyword": "******"}}}, - ), - ( - {"12345"}, - {"keyword": {"keyword": {"keyword": 12345}}}, - {"keyword": {"keyword": {"keyword": "*****"}}}, - ), - ], - ) + @pytest.mark.parametrize("secrets, extra_kwargs, expected", cases) def test_happy_path(self, secrets: set[str], extra_kwargs: dict, expected: dict): redact_config_secrets_processor = get_redact_config_secrets_processor( config_secrets=secrets @@ -72,13 +741,11 @@ def test_happy_path(self, secrets: set[str], extra_kwargs: dict, expected: dict) [ ( { - "keyword": { - "keyword": { - "keyword": { - "keyword": { - "keyword": { - "keyword": {"keyword": {"keyword": "secret"}} - } + "key_1": { + "key_1": { + "key_1": { + "key_1": { + "key_1": {"key_1": {"key_1": {"key_1": "secret"}}} } } } @@ -87,8 +754,8 @@ def test_happy_path(self, secrets: set[str], extra_kwargs: dict, expected: dict) "Unable to redact deeply nested secrets in log", ), ( - {"keyword": object()}, - "Unable to redact object log arguments in log", + {"key_1": NoStr()}, + "Unable to redact NoStr log arguments in log", ), ], )