From 77e87be5fe8bb81c72916d56623ce78b0ba81a8f Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Thu, 14 Nov 2024 11:33:12 -0800 Subject: [PATCH] Adding namespace support for Snowflake (#5486) --- CHANGELOG.md | 3 + .../connection_secrets_bigquery.py | 14 +- .../connection_secrets_snowflake.py | 12 +- .../snowflake_namespace_meta.py | 17 ++ .../api/service/connectors/query_config.py | 32 ++- .../api/service/connectors/sql_connector.py | 6 +- tests/fixtures/snowflake_fixtures.py | 68 +++++ .../test_connection_template_endpoints.py | 40 ++- .../service/connectors/test_queryconfig.py | 5 +- .../connectors/test_snowflake_connector.py | 76 ++++++ .../connectors/test_snowflake_query_config.py | 193 ++++++++++++++ .../test_request_runner_service.py | 125 +-------- .../test_snowflake_privacy_requests.py | 243 ++++++++++++++++++ 13 files changed, 669 insertions(+), 165 deletions(-) create mode 100644 src/fides/api/schemas/namespace_meta/snowflake_namespace_meta.py create mode 100644 tests/ops/service/connectors/test_snowflake_connector.py create mode 100644 tests/ops/service/connectors/test_snowflake_query_config.py create mode 100644 tests/ops/service/privacy_request/test_snowflake_privacy_requests.py diff --git a/CHANGELOG.md b/CHANGELOG.md index cd072ea5cf4..3ba0f331748 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,9 @@ The types of changes are: ## [Unreleased](https://github.com/ethyca/fidesplus/compare/2.49.1...main) +### Added +- Added namespace support for Snowflake [#5486](https://github.com/ethyca/fides/pull/5486) + ### Fixed - Fixed deletion of ConnectionConfigs that have related MonitorConfigs [#5478](https://github.com/ethyca/fides/pull/5478) diff --git a/src/fides/api/schemas/connection_configuration/connection_secrets_bigquery.py b/src/fides/api/schemas/connection_configuration/connection_secrets_bigquery.py index 7a64e94668f..4b195781888 100644 --- a/src/fides/api/schemas/connection_configuration/connection_secrets_bigquery.py +++ b/src/fides/api/schemas/connection_configuration/connection_secrets_bigquery.py @@ -16,19 +16,19 @@ class KeyfileCreds(BaseModel): type: Optional[str] = None project_id: str = Field(title="Project ID") - private_key_id: Optional[str] = Field(default=None, title="Private Key ID") + private_key_id: Optional[str] = Field(default=None, title="Private key ID") private_key: Optional[str] = Field( - default=None, json_schema_extra={"sensitive": True} + default=None, title="Private key", json_schema_extra={"sensitive": True} ) - client_email: Optional[EmailStr] = None + client_email: Optional[EmailStr] = Field(None, title="Client email") client_id: Optional[str] = Field(default=None, title="Client ID") auth_uri: Optional[str] = Field(default=None, title="Auth URI") token_uri: Optional[str] = Field(default=None, title="Token URI") auth_provider_x509_cert_url: Optional[str] = Field( - default=None, title="Auth Provider X509 Cert URL" + default=None, title="Auth provider X509 cert URL" ) client_x509_cert_url: Optional[str] = Field( - default=None, title="Client X509 Cert URL" + default=None, title="Client X509 cert URL" ) @@ -42,8 +42,8 @@ class BigQuerySchema(ConnectionConfigSecretsSchema): ) dataset: Optional[str] = Field( default=None, - title="Default dataset", - description="The default BigQuery dataset that will be used if one isn't provided in the associated Fides datasets.", + title="Dataset", + description="Only provide a dataset to scope discovery monitors and privacy request automation to a specific BigQuery dataset. In most cases, this can be left blank.", ) _required_components: ClassVar[List[str]] = ["keyfile_creds"] diff --git a/src/fides/api/schemas/connection_configuration/connection_secrets_snowflake.py b/src/fides/api/schemas/connection_configuration/connection_secrets_snowflake.py index b08fa01367f..4b22ef2539a 100644 --- a/src/fides/api/schemas/connection_configuration/connection_secrets_snowflake.py +++ b/src/fides/api/schemas/connection_configuration/connection_secrets_snowflake.py @@ -49,13 +49,15 @@ class SnowflakeSchema(ConnectionConfigSecretsSchema): title="Warehouse", description="The name of the Snowflake warehouse where your queries will be executed.", ) - database_name: str = Field( + database_name: Optional[str] = Field( + default=None, title="Database", - description="The name of the Snowflake database you want to connect to.", + description="Only provide a database name to scope discovery monitors and privacy request automation to a specific database. In most cases, this can be left blank.", ) - schema_name: str = Field( + schema_name: Optional[str] = Field( + default=None, title="Schema", - description="The name of the Snowflake schema within the selected database.", + description="Only provide a schema to scope discovery monitors and privacy request automation to a specific schema. In most cases, this can be left blank.", ) role_name: Optional[str] = Field( title="Role", @@ -67,8 +69,6 @@ class SnowflakeSchema(ConnectionConfigSecretsSchema): "account_identifier", "user_login_name", "warehouse_name", - "database_name", - "schema_name", ] @model_validator(mode="after") diff --git a/src/fides/api/schemas/namespace_meta/snowflake_namespace_meta.py b/src/fides/api/schemas/namespace_meta/snowflake_namespace_meta.py new file mode 100644 index 00000000000..538c9bd0f02 --- /dev/null +++ b/src/fides/api/schemas/namespace_meta/snowflake_namespace_meta.py @@ -0,0 +1,17 @@ +from typing import Literal + +from fides.api.schemas.namespace_meta.namespace_meta import NamespaceMeta + + +class SnowflakeNamespaceMeta(NamespaceMeta): + """ + Represents the namespace structure for Snowflake queries. + + Attributes: + database_name (str): Name of the specific Snowflake database. + schema (str): The schema within the database. + """ + + connection_type: Literal["snowflake"] = "snowflake" + database_name: str + schema: str # type: ignore[assignment] diff --git a/src/fides/api/service/connectors/query_config.py b/src/fides/api/service/connectors/query_config.py index 4b0758e69f2..bd30736c400 100644 --- a/src/fides/api/service/connectors/query_config.py +++ b/src/fides/api/service/connectors/query_config.py @@ -28,6 +28,9 @@ BigQueryNamespaceMeta, ) from fides.api.schemas.namespace_meta.namespace_meta import NamespaceMeta +from fides.api.schemas.namespace_meta.snowflake_namespace_meta import ( + SnowflakeNamespaceMeta, +) from fides.api.schemas.policy import ActionType from fides.api.service.masking.strategy.masking_strategy import MaskingStrategy from fides.api.service.masking.strategy.masking_strategy_nullify import ( @@ -775,6 +778,8 @@ class MicrosoftSQLServerQueryConfig(QueryStringWithoutTuplesOverrideQueryConfig) class SnowflakeQueryConfig(SQLQueryConfig): """Generates SQL in Snowflake's custom dialect.""" + namespace_meta_schema = SnowflakeNamespaceMeta + def generate_raw_query( self, field_list: List[str], filters: Dict[str, List[Any]] ) -> Optional[TextClause]: @@ -791,13 +796,34 @@ def format_clause_for_query( """Returns field names in clauses surrounded by quotation marks as required by Snowflake syntax.""" return f'"{string_path}" {operator} (:{operand})' + def _generate_table_name(self) -> str: + """ + Prepends the dataset name and schema to the base table name + if the Snowflake namespace meta is provided. + """ + + table_name = ( + f'"{self.node.collection.name}"' # Always quote the base table name + ) + + if not self.namespace_meta: + return table_name + + snowflake_meta = cast(SnowflakeNamespaceMeta, self.namespace_meta) + qualified_name = f'"{snowflake_meta.schema}".{table_name}' + + if database_name := snowflake_meta.database_name: + return f'"{database_name}".{qualified_name}' + + return qualified_name + def get_formatted_query_string( self, field_list: str, clauses: List[str], ) -> str: """Returns a query string with double quotation mark formatting as required by Snowflake syntax.""" - return f'SELECT {field_list} FROM "{self.node.collection.name}" WHERE ({" OR ".join(clauses)})' + return f'SELECT {field_list} FROM {self._generate_table_name()} WHERE ({" OR ".join(clauses)})' def format_key_map_for_update_stmt(self, fields: List[str]) -> List[str]: """Adds the appropriate formatting for update statements in this datastore.""" @@ -809,8 +835,8 @@ def get_update_stmt( update_clauses: List[str], pk_clauses: List[str], ) -> str: - """Returns a parameterised update statement in Snowflake dialect.""" - return f'UPDATE "{self.node.address.collection}" SET {",".join(update_clauses)} WHERE {" AND ".join(pk_clauses)}' + """Returns a parameterized update statement in Snowflake dialect.""" + return f'UPDATE {self._generate_table_name()} SET {", ".join(update_clauses)} WHERE {" AND ".join(pk_clauses)}' class RedshiftQueryConfig(SQLQueryConfig): diff --git a/src/fides/api/service/connectors/sql_connector.py b/src/fides/api/service/connectors/sql_connector.py index e609950a1a1..8a22cb0106e 100644 --- a/src/fides/api/service/connectors/sql_connector.py +++ b/src/fides/api/service/connectors/sql_connector.py @@ -725,7 +725,11 @@ def get_connect_args(self) -> Dict[str, Any]: def query_config(self, node: ExecutionNode) -> SQLQueryConfig: """Query wrapper corresponding to the input execution_node.""" - return SnowflakeQueryConfig(node) + + db: Session = Session.object_session(self.configuration) + return SnowflakeQueryConfig( + node, SQLConnector.get_namespace_meta(db, node.address.dataset) + ) class MicrosoftSQLServerConnector(SQLConnector): diff --git a/tests/fixtures/snowflake_fixtures.py b/tests/fixtures/snowflake_fixtures.py index 5b5c685f1e8..479ce1c81a0 100644 --- a/tests/fixtures/snowflake_fixtures.py +++ b/tests/fixtures/snowflake_fixtures.py @@ -94,6 +94,45 @@ def snowflake_connection_config( connection_config.delete(db) +@pytest.fixture(scope="function") +def snowflake_connection_config_without_default_dataset_or_schema( + db: Session, + integration_config: Dict[str, str], + snowflake_connection_config_without_secrets: ConnectionConfig, +) -> Generator: + """ + Returns a Snowflake ConectionConfig with secrets attached if secrets are present + in the configuration. + """ + connection_config = snowflake_connection_config_without_secrets + + account_identifier = integration_config.get("snowflake", {}).get( + "account_identifier" + ) or os.environ.get("SNOWFLAKE_TEST_ACCOUNT_IDENTIFIER") + user_login_name = integration_config.get("snowflake", {}).get( + "user_login_name" + ) or os.environ.get("SNOWFLAKE_TEST_USER_LOGIN_NAME") + password = integration_config.get("snowflake", {}).get( + "password" + ) or os.environ.get("SNOWFLAKE_TEST_PASSWORD") + warehouse_name = integration_config.get("snowflake", {}).get( + "warehouse_name" + ) or os.environ.get("SNOWFLAKE_TEST_WAREHOUSE_NAME") + + if all([account_identifier, user_login_name, password, warehouse_name]): + schema = SnowflakeSchema( + account_identifier=account_identifier, + user_login_name=user_login_name, + password=password, + warehouse_name=warehouse_name, + ) + connection_config.secrets = schema.model_dump(mode="json") + connection_config.save(db=db) + + yield connection_config + connection_config.delete(db) + + @pytest.fixture(scope="function") def snowflake_connection_config_with_keypair( db: Session, @@ -190,3 +229,32 @@ def snowflake_example_test_dataset_config( yield dataset_config dataset_config.delete(db=db) ctl_dataset.delete(db=db) + + +@pytest.fixture +def snowflake_example_test_dataset_config_with_namespace_meta( + snowflake_connection_config_without_default_dataset_or_schema: ConnectionConfig, + db: Session, + example_datasets: List[Dict], +) -> Generator: + + connection_config = snowflake_connection_config_without_default_dataset_or_schema + dataset = example_datasets[2] + dataset["fides_meta"] = { + "namespace": {"database_name": "FIDESOPS_TEST", "schema": "TEST"} + } + fides_key = dataset["fides_key"] + + ctl_dataset = CtlDataset.create_from_dataset_dict(db, dataset) + + dataset_config = DatasetConfig.create( + db=db, + data={ + "connection_config_id": connection_config.id, + "fides_key": fides_key, + "ctl_dataset_id": ctl_dataset.id, + }, + ) + yield dataset_config + dataset_config.delete(db=db) + ctl_dataset.delete(db=db) diff --git a/tests/ops/api/v1/endpoints/test_connection_template_endpoints.py b/tests/ops/api/v1/endpoints/test_connection_template_endpoints.py index bc9e1668f4f..a0f597f2f50 100644 --- a/tests/ops/api/v1/endpoints/test_connection_template_endpoints.py +++ b/tests/ops/api/v1/endpoints/test_connection_template_endpoints.py @@ -781,8 +781,8 @@ def test_get_connection_secret_schema_bigquery( "allOf": [{"$ref": "#/definitions/KeyfileCreds"}], }, "dataset": { - "title": "Default dataset", - "description": "The default BigQuery dataset that will be used if one isn't provided in the associated Fides datasets.", + "title": "Dataset", + "description": "Only provide a dataset to scope discovery monitors and privacy request automation to a specific BigQuery dataset. In most cases, this can be left blank.", "type": "string", }, }, @@ -796,16 +796,16 @@ def test_get_connection_secret_schema_bigquery( "type": {"title": "Type", "type": "string"}, "project_id": {"title": "Project ID", "type": "string"}, "private_key_id": { - "title": "Private Key ID", + "title": "Private key ID", "type": "string", }, "private_key": { - "title": "Private Key", + "title": "Private key", "sensitive": True, "type": "string", }, "client_email": { - "title": "Client Email", + "title": "Client email", "type": "string", "format": "email", }, @@ -813,11 +813,11 @@ def test_get_connection_secret_schema_bigquery( "auth_uri": {"title": "Auth URI", "type": "string"}, "token_uri": {"title": "Token URI", "type": "string"}, "auth_provider_x509_cert_url": { - "title": "Auth Provider X509 Cert URL", + "title": "Auth provider X509 cert URL", "type": "string", }, "client_x509_cert_url": { - "title": "Client X509 Cert URL", + "title": "Client X509 cert URL", "type": "string", }, }, @@ -1477,24 +1477,22 @@ def test_get_connection_secret_schema_snowflake( base_url.format(connection_type="snowflake"), headers=auth_header ) assert resp.json() == { - "title": "SnowflakeSchema", "description": "Schema to validate the secrets needed to connect to Snowflake", - "type": "object", "properties": { "account_identifier": { - "title": "Account Name", "description": "The unique identifier for your Snowflake account.", + "title": "Account Name", "type": "string", }, "user_login_name": { - "title": "Username", "description": "The user account used to authenticate and access the database.", + "title": "Username", "type": "string", }, "password": { - "title": "Password", "description": "The password used to authenticate and access the database. You can use a password or a private key, but not both.", "sensitive": True, + "title": "Password", "type": "string", }, "private_key": { @@ -1510,33 +1508,29 @@ def test_get_connection_secret_schema_snowflake( "type": "string", }, "warehouse_name": { - "title": "Warehouse", "description": "The name of the Snowflake warehouse where your queries will be executed.", + "title": "Warehouse", "type": "string", }, "database_name": { + "description": "Only provide a database name to scope discovery monitors and privacy request automation to a specific database. In most cases, this can be left blank.", "title": "Database", - "description": "The name of the Snowflake database you want to connect to.", "type": "string", }, "schema_name": { + "description": "Only provide a schema to scope discovery monitors and privacy request automation to a specific schema. In most cases, this can be left blank.", "title": "Schema", - "description": "The name of the Snowflake schema within the selected database.", "type": "string", }, "role_name": { - "title": "Role", "description": "The Snowflake role to assume for the session, if different than Username.", + "title": "Role", "type": "string", }, }, - "required": [ - "account_identifier", - "user_login_name", - "warehouse_name", - "database_name", - "schema_name", - ], + "required": ["account_identifier", "user_login_name", "warehouse_name"], + "title": "SnowflakeSchema", + "type": "object", } def test_get_connection_secret_schema_hubspot( diff --git a/tests/ops/service/connectors/test_queryconfig.py b/tests/ops/service/connectors/test_queryconfig.py index 95cb2a831fd..52572288f8c 100644 --- a/tests/ops/service/connectors/test_queryconfig.py +++ b/tests/ops/service/connectors/test_queryconfig.py @@ -783,7 +783,10 @@ class NewSQLQueryConfig(SQLQueryConfig): pass with pytest.raises(MissingNamespaceSchemaException) as exc: - NewSQLQueryConfig(payment_card_node, NewSQLNamespaceMeta(schema="public")) + NewSQLQueryConfig( + payment_card_node, + NewSQLNamespaceMeta(schema="public"), + ) assert ( "NewSQLQueryConfig must define a namespace_meta_schema when namespace_meta is provided." in str(exc) diff --git a/tests/ops/service/connectors/test_snowflake_connector.py b/tests/ops/service/connectors/test_snowflake_connector.py new file mode 100644 index 00000000000..6ce44df2bf4 --- /dev/null +++ b/tests/ops/service/connectors/test_snowflake_connector.py @@ -0,0 +1,76 @@ +from typing import Generator + +import pytest +from fideslang.models import Dataset + +from fides.api.graph.config import CollectionAddress +from fides.api.graph.graph import DatasetGraph +from fides.api.graph.traversal import Traversal +from fides.api.models.datasetconfig import DatasetConfig, convert_dataset_to_graph +from fides.api.schemas.namespace_meta.snowflake_namespace_meta import ( + SnowflakeNamespaceMeta, +) +from fides.api.service.connectors.sql_connector import SnowflakeConnector + + +@pytest.mark.integration_external +@pytest.mark.integration_snowflake +class TestSnowflakeConnector: + """ + Tests to verify that the query_config method of SnowflakeConnector + correctly retrieves namespace metadata from the dataset (if available). + """ + + @pytest.fixture + def execution_node( + self, snowflake_example_test_dataset_config: DatasetConfig + ) -> Generator: + dataset_config = snowflake_example_test_dataset_config + graph_dataset = convert_dataset_to_graph( + Dataset.model_validate(dataset_config.ctl_dataset), + dataset_config.connection_config.key, + ) + dataset_graph = DatasetGraph(graph_dataset) + traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"}) + + yield traversal.traversal_node_dict[ + CollectionAddress("snowflake_example_test_dataset", "customer") + ].to_mock_execution_node() + + @pytest.fixture + def execution_node_with_namespace_meta( + self, snowflake_example_test_dataset_config_with_namespace_meta: DatasetConfig + ) -> Generator: + dataset_config = snowflake_example_test_dataset_config_with_namespace_meta + graph_dataset = convert_dataset_to_graph( + Dataset.model_validate(dataset_config.ctl_dataset), + dataset_config.connection_config.key, + ) + dataset_graph = DatasetGraph(graph_dataset) + traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"}) + + yield traversal.traversal_node_dict[ + CollectionAddress("snowflake_example_test_dataset", "customer") + ].to_mock_execution_node() + + def test_query_config( + self, + snowflake_example_test_dataset_config: DatasetConfig, + execution_node, + ): + dataset_config = snowflake_example_test_dataset_config + connector = SnowflakeConnector(dataset_config.connection_config) + query_config = connector.query_config(execution_node) + assert query_config.namespace_meta is None + + def test_query_config_with_namespace_meta( + self, + snowflake_example_test_dataset_config_with_namespace_meta: DatasetConfig, + execution_node_with_namespace_meta, + ): + dataset_config = snowflake_example_test_dataset_config_with_namespace_meta + connector = SnowflakeConnector(dataset_config.connection_config) + query_config = connector.query_config(execution_node_with_namespace_meta) + assert query_config.namespace_meta == SnowflakeNamespaceMeta( + **dataset_config.ctl_dataset.fides_meta["namespace"] + ) diff --git a/tests/ops/service/connectors/test_snowflake_query_config.py b/tests/ops/service/connectors/test_snowflake_query_config.py new file mode 100644 index 00000000000..656ad75c3cf --- /dev/null +++ b/tests/ops/service/connectors/test_snowflake_query_config.py @@ -0,0 +1,193 @@ +from typing import Generator + +import pytest +from fideslang.models import Dataset +from pydantic import ValidationError + +from fides.api.graph.config import CollectionAddress +from fides.api.graph.execution import ExecutionNode +from fides.api.graph.graph import DatasetGraph +from fides.api.graph.traversal import Traversal +from fides.api.models.datasetconfig import DatasetConfig, convert_dataset_to_graph +from fides.api.schemas.namespace_meta.snowflake_namespace_meta import ( + SnowflakeNamespaceMeta, +) +from fides.api.service.connectors import SnowflakeConnector +from fides.api.service.connectors.query_config import SnowflakeQueryConfig + + +@pytest.mark.integration_external +@pytest.mark.integration_snowflake +class TestSnowflakeQueryConfig: + """ + Verify that the generate_query method of SnowflakeQueryConfig correctly adjusts + the table name based on the available namespace info in the dataset's fides_meta. + """ + + @pytest.fixture(scope="function") + def snowflake_client(self, snowflake_connection_config): + connector = SnowflakeConnector(snowflake_connection_config) + return connector.client() + + @pytest.fixture(scope="function") + def dataset_graph(self, example_datasets, snowflake_connection_config): + dataset = Dataset(**example_datasets[2]) + graph = convert_dataset_to_graph(dataset, snowflake_connection_config.key) + return DatasetGraph(*[graph]) + + @pytest.fixture(scope="function") + def employee_node(self, dataset_graph): + identity = {"email": "customer-1@example.com"} + snowflake_traversal = Traversal(dataset_graph, identity) + return snowflake_traversal.traversal_node_dict[ + CollectionAddress("snowflake_example_test_dataset", "employee") + ].to_mock_execution_node() + + @pytest.fixture(scope="function") + def address_node(self, dataset_graph): + identity = {"email": "customer-1@example.com"} + snowflake_traversal = Traversal(dataset_graph, identity) + return snowflake_traversal.traversal_node_dict[ + CollectionAddress("snowflake_example_test_dataset", "address") + ].to_mock_execution_node() + + @pytest.fixture + def execution_node( + self, snowflake_example_test_dataset_config_with_namespace_meta: DatasetConfig + ) -> Generator: + dataset_config = snowflake_example_test_dataset_config_with_namespace_meta + graph_dataset = convert_dataset_to_graph( + Dataset.model_validate(dataset_config.ctl_dataset), + dataset_config.connection_config.key, + ) + dataset_graph = DatasetGraph(graph_dataset) + traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"}) + + yield traversal.traversal_node_dict[ + CollectionAddress("snowflake_example_test_dataset", "customer") + ].to_mock_execution_node() + + @pytest.mark.parametrize( + "namespace_meta, expected_query", + [ + ( + SnowflakeNamespaceMeta(database_name="FIDESOPS_TEST", schema="TEST"), + 'SELECT "address_id", "created", "email", "id", "name", "variant_eg" FROM "FIDESOPS_TEST"."TEST"."customer" WHERE ("email" = (:email))', + ), + # Namespace meta will be a dict / JSON when retrieved from the DB + ( + {"database_name": "FIDESOPS_TEST", "schema": "TEST"}, + 'SELECT "address_id", "created", "email", "id", "name", "variant_eg" FROM "FIDESOPS_TEST"."TEST"."customer" WHERE ("email" = (:email))', + ), + ( + { + "database_name": "FIDESOPS_TEST", + "schema": "TEST", + "connection_type": "snowflake", + }, + 'SELECT "address_id", "created", "email", "id", "name", "variant_eg" FROM "FIDESOPS_TEST"."TEST"."customer" WHERE ("email" = (:email))', + ), + ( + None, + 'SELECT "address_id", "created", "email", "id", "name", "variant_eg" FROM "customer" WHERE ("email" = (:email))', + ), + ], + ) + def test_generate_query_with_namespace_meta( + self, execution_node: ExecutionNode, namespace_meta, expected_query + ): + query_config = SnowflakeQueryConfig(execution_node, namespace_meta) + assert ( + query_config.generate_query( + input_data={"email": ["customer-1@example.com"]} + ).text + == expected_query + ) + + def test_generate_query_with_invalid_namespace_meta( + self, execution_node: ExecutionNode + ): + with pytest.raises(ValidationError) as exc: + SnowflakeQueryConfig( + execution_node, SnowflakeNamespaceMeta(dataset_id="first_dataset") + ) + assert "Field required" in str(exc) + + def test_generate_update_stmt( + self, + db, + address_node, + erasure_policy, + privacy_request, + dataset_graph, + ): + """ + Test node uses typical policy-level masking strategies in an update statement + """ + + assert ( + dataset_graph.nodes[ + CollectionAddress("snowflake_example_test_dataset", "address") + ].collection.masking_strategy_override + is None + ) + + erasure_policy.rules[0].targets[0].data_category = "user" + erasure_policy.rules[0].targets[0].save(db) + update_stmt = SnowflakeQueryConfig(address_node).generate_update_stmt( + { + "id": "1", + "house": "222", + "state": "TX", + "city": "Houston", + "street": "Water", + "zip": "11111", + }, + erasure_policy, + privacy_request, + ) + assert ( + str(update_stmt) + == 'UPDATE "address" SET "city" = :city, "house" = :house, "state" = :state, "street" = :street, "zip" = :zip WHERE "id" = :id' + ) + + def test_generate_namespaced_update_stmt( + self, + db, + address_node, + erasure_policy, + privacy_request, + dataset_graph, + ): + """ + Test node uses typical policy-level masking strategies in an update statement + """ + + assert ( + dataset_graph.nodes[ + CollectionAddress("snowflake_example_test_dataset", "address") + ].collection.masking_strategy_override + is None + ) + + erasure_policy.rules[0].targets[0].data_category = "user" + erasure_policy.rules[0].targets[0].save(db) + update_stmt = SnowflakeQueryConfig( + address_node, + SnowflakeNamespaceMeta(database_name="FIDESOPS_TEST", schema="TEST"), + ).generate_update_stmt( + { + "id": "1", + "house": "222", + "state": "TX", + "city": "Houston", + "street": "Water", + "zip": "11111", + }, + erasure_policy, + privacy_request, + ) + assert ( + str(update_stmt) + == 'UPDATE "FIDESOPS_TEST"."TEST"."address" SET "city" = :city, "house" = :house, "state" = :state, "street" = :street, "zip" = :zip WHERE "id" = :id' + ) diff --git a/tests/ops/service/privacy_request/test_request_runner_service.py b/tests/ops/service/privacy_request/test_request_runner_service.py index 053db3b85ef..66f332d4170 100644 --- a/tests/ops/service/privacy_request/test_request_runner_service.py +++ b/tests/ops/service/privacy_request/test_request_runner_service.py @@ -48,10 +48,7 @@ from fides.api.schemas.saas.shared_schemas import HTTPMethod, SaaSRequestParams from fides.api.service.connectors.dynamodb_connector import DynamoDBConnector from fides.api.service.connectors.saas_connector import SaaSConnector -from fides.api.service.connectors.sql_connector import ( - RedshiftConnector, - SnowflakeConnector, -) +from fides.api.service.connectors.sql_connector import RedshiftConnector from fides.api.service.masking.strategy.masking_strategy import MaskingStrategy from fides.api.service.masking.strategy.masking_strategy_hmac import HmacMaskingStrategy from fides.api.service.privacy_request.request_runner_service import ( @@ -1785,126 +1782,6 @@ def test_create_and_process_erasure_request_read_access( assert customer_found -@pytest.fixture(scope="function") -def snowflake_resources( - snowflake_example_test_dataset_config, -): - snowflake_connection_config = ( - snowflake_example_test_dataset_config.connection_config - ) - snowflake_client = SnowflakeConnector(snowflake_connection_config).client() - uuid = str(uuid4()) - customer_email = f"customer-{uuid}@example.com" - formatted_customer_email = f"'{customer_email}'" - customer_name = f"{uuid}" - formatted_customer_name = f"'{customer_name}'" - - stmt = 'select max("id") from "customer";' - res = snowflake_client.execute(stmt).all() - customer_id = res[0][0] + 1 - - stmt = f""" - insert into "customer" ("id", "email", "name", "variant_eg") - select {customer_id}, {formatted_customer_email}, {formatted_customer_name}, to_variant({formatted_customer_name}); - """ - res = snowflake_client.execute(stmt).all() - assert res[0][0] == 1 - yield { - "email": customer_email, - "formatted_email": formatted_customer_email, - "name": customer_name, - "id": customer_id, - "client": snowflake_client, - } - # Remove test data and close Snowflake connection in teardown - stmt = f'delete from "customer" where "email" = {formatted_customer_email};' - res = snowflake_client.execute(stmt).all() - assert res[0][0] == 1 - - -@pytest.mark.integration_external -@pytest.mark.integration_snowflake -@pytest.mark.parametrize( - "dsr_version", - ["use_dsr_3_0", "use_dsr_2_0"], -) -def test_create_and_process_access_request_snowflake( - snowflake_resources, - db, - cache, - policy, - dsr_version, - request, - run_privacy_request_task, -): - request.getfixturevalue(dsr_version) # REQUIRED to test both DSR 3.0 and 2.0 - - customer_email = snowflake_resources["email"] - customer_name = snowflake_resources["name"] - data = { - "requested_at": "2021-08-30T16:09:37.359Z", - "policy_key": policy.key, - "identity": {"email": customer_email}, - } - pr = get_privacy_request_results( - db, - policy, - run_privacy_request_task, - data, - task_timeout=PRIVACY_REQUEST_TASK_TIMEOUT_EXTERNAL, - ) - results = pr.get_raw_access_results() - customer_table_key = f"snowflake_example_test_dataset:customer" - assert len(results[customer_table_key]) == 1 - assert results[customer_table_key][0]["email"] == customer_email - assert results[customer_table_key][0]["name"] == customer_name - - pr.delete(db=db) - - -@pytest.mark.integration_external -@pytest.mark.integration_snowflake -@pytest.mark.parametrize( - "dsr_version", - ["use_dsr_3_0", "use_dsr_2_0"], -) -def test_create_and_process_erasure_request_snowflake( - snowflake_example_test_dataset_config, - snowflake_resources, - integration_config: Dict[str, str], - db, - cache, - dsr_version, - request, - erasure_policy, - run_privacy_request_task, -): - request.getfixturevalue(dsr_version) # REQUIRED to test both DSR 3.0 and 2.0 - - customer_email = snowflake_resources["email"] - snowflake_client = snowflake_resources["client"] - formatted_customer_email = snowflake_resources["formatted_email"] - data = { - "requested_at": "2021-08-30T16:09:37.359Z", - "policy_key": erasure_policy.key, - "identity": {"email": customer_email}, - } - pr = get_privacy_request_results( - db, - erasure_policy, - run_privacy_request_task, - data, - task_timeout=PRIVACY_REQUEST_TASK_TIMEOUT_EXTERNAL, - ) - pr.delete(db=db) - - stmt = f'select "name", "variant_eg" from "customer" where "email" = {formatted_customer_email};' - res = snowflake_client.execute(stmt).all() - for row in res: - assert row.name is None - assert row.variant_eg is None - - @pytest.fixture(scope="function") def redshift_resources( redshift_example_test_dataset_config, diff --git a/tests/ops/service/privacy_request/test_snowflake_privacy_requests.py b/tests/ops/service/privacy_request/test_snowflake_privacy_requests.py new file mode 100644 index 00000000000..ba8b514eb31 --- /dev/null +++ b/tests/ops/service/privacy_request/test_snowflake_privacy_requests.py @@ -0,0 +1,243 @@ +from uuid import uuid4 + +import pytest + +from fides.api.service.connectors.sql_connector import SnowflakeConnector +from tests.ops.service.privacy_request.test_request_runner_service import ( + PRIVACY_REQUEST_TASK_TIMEOUT_EXTERNAL, + get_privacy_request_results, +) + + +@pytest.fixture(scope="function") +def snowflake_resources( + snowflake_example_test_dataset_config, +): + snowflake_connection_config = ( + snowflake_example_test_dataset_config.connection_config + ) + snowflake_client = SnowflakeConnector(snowflake_connection_config).client() + uuid = str(uuid4()) + customer_email = f"customer-{uuid}@example.com" + formatted_customer_email = f"'{customer_email}'" + customer_name = f"{uuid}" + formatted_customer_name = f"'{customer_name}'" + + stmt = 'select max("id") from "customer";' + res = snowflake_client.execute(stmt).all() + customer_id = res[0][0] + 1 + + stmt = f""" + insert into "customer" ("id", "email", "name", "variant_eg") + select {customer_id}, {formatted_customer_email}, {formatted_customer_name}, to_variant({formatted_customer_name}); + """ + res = snowflake_client.execute(stmt).all() + assert res[0][0] == 1 + yield { + "email": customer_email, + "formatted_email": formatted_customer_email, + "name": customer_name, + "id": customer_id, + "client": snowflake_client, + } + # Remove test data and close Snowflake connection in teardown + stmt = f'delete from "customer" where "email" = {formatted_customer_email};' + res = snowflake_client.execute(stmt).all() + assert res[0][0] == 1 + + +@pytest.fixture(scope="function") +def snowflake_resources_with_namespace_meta( + snowflake_example_test_dataset_config_with_namespace_meta, +): + snowflake_connection_config = ( + snowflake_example_test_dataset_config_with_namespace_meta.connection_config + ) + snowflake_client = SnowflakeConnector(snowflake_connection_config).client() + uuid = str(uuid4()) + customer_email = f"customer-{uuid}@example.com" + formatted_customer_email = f"'{customer_email}'" + customer_name = f"{uuid}" + formatted_customer_name = f"'{customer_name}'" + + stmt = 'select max("id") from "FIDESOPS_TEST"."TEST"."customer";' + res = snowflake_client.execute(stmt).all() + customer_id = res[0][0] + 1 + + stmt = f""" + insert into "FIDESOPS_TEST"."TEST"."customer" ("id", "email", "name", "variant_eg") + select {customer_id}, {formatted_customer_email}, {formatted_customer_name}, to_variant({formatted_customer_name}); + """ + res = snowflake_client.execute(stmt).all() + assert res[0][0] == 1 + yield { + "email": customer_email, + "formatted_email": formatted_customer_email, + "name": customer_name, + "id": customer_id, + "client": snowflake_client, + } + # Remove test data and close Snowflake connection in teardown + stmt = f'delete from "FIDESOPS_TEST"."TEST"."customer" where "email" = {formatted_customer_email};' + res = snowflake_client.execute(stmt).all() + assert res[0][0] == 1 + + +@pytest.mark.integration_external +@pytest.mark.integration_snowflake +@pytest.mark.parametrize( + "dsr_version", + ["use_dsr_3_0", "use_dsr_2_0"], +) +def test_create_and_process_access_request_snowflake( + snowflake_resources, + db, + policy, + dsr_version, + request, + run_privacy_request_task, +): + request.getfixturevalue(dsr_version) # REQUIRED to test both DSR 3.0 and 2.0 + + customer_email = snowflake_resources["email"] + customer_name = snowflake_resources["name"] + data = { + "requested_at": "2021-08-30T16:09:37.359Z", + "policy_key": policy.key, + "identity": {"email": customer_email}, + } + pr = get_privacy_request_results( + db, + policy, + run_privacy_request_task, + data, + task_timeout=PRIVACY_REQUEST_TASK_TIMEOUT_EXTERNAL, + ) + results = pr.get_raw_access_results() + customer_table_key = "snowflake_example_test_dataset:customer" + assert len(results[customer_table_key]) == 1 + assert results[customer_table_key][0]["email"] == customer_email + assert results[customer_table_key][0]["name"] == customer_name + + pr.delete(db=db) + + +@pytest.mark.integration_external +@pytest.mark.integration_snowflake +@pytest.mark.parametrize( + "dsr_version", + ["use_dsr_3_0", "use_dsr_2_0"], +) +def test_create_and_process_erasure_request_snowflake( + snowflake_resources, + db, + dsr_version, + request, + erasure_policy, + run_privacy_request_task, +): + request.getfixturevalue(dsr_version) # REQUIRED to test both DSR 3.0 and 2.0 + + customer_email = snowflake_resources["email"] + snowflake_client = snowflake_resources["client"] + formatted_customer_email = snowflake_resources["formatted_email"] + data = { + "requested_at": "2021-08-30T16:09:37.359Z", + "policy_key": erasure_policy.key, + "identity": {"email": customer_email}, + } + pr = get_privacy_request_results( + db, + erasure_policy, + run_privacy_request_task, + data, + task_timeout=PRIVACY_REQUEST_TASK_TIMEOUT_EXTERNAL, + ) + pr.delete(db=db) + + stmt = f'select "name", "variant_eg" from "customer" where "email" = {formatted_customer_email};' + res = snowflake_client.execute(stmt).all() + for row in res: + assert row.name is None + assert row.variant_eg is None + + +@pytest.mark.integration_external +@pytest.mark.integration_snowflake +@pytest.mark.parametrize( + "dsr_version", + ["use_dsr_3_0", "use_dsr_2_0"], +) +def test_create_and_process_access_request_snowflake_with_namespace_meta( + snowflake_resources_with_namespace_meta, + db, + policy, + dsr_version, + request, + run_privacy_request_task, +): + request.getfixturevalue(dsr_version) # REQUIRED to test both DSR 3.0 and 2.0 + + customer_email = snowflake_resources_with_namespace_meta["email"] + customer_name = snowflake_resources_with_namespace_meta["name"] + data = { + "requested_at": "2021-08-30T16:09:37.359Z", + "policy_key": policy.key, + "identity": {"email": customer_email}, + } + pr = get_privacy_request_results( + db, + policy, + run_privacy_request_task, + data, + task_timeout=PRIVACY_REQUEST_TASK_TIMEOUT_EXTERNAL, + ) + results = pr.get_raw_access_results() + customer_table_key = "snowflake_example_test_dataset:customer" + assert len(results[customer_table_key]) == 1 + assert results[customer_table_key][0]["email"] == customer_email + assert results[customer_table_key][0]["name"] == customer_name + + pr.delete(db=db) + + +@pytest.mark.integration_external +@pytest.mark.integration_snowflake +@pytest.mark.parametrize( + "dsr_version", + ["use_dsr_3_0", "use_dsr_2_0"], +) +def test_create_and_process_erasure_request_snowflake_with_namespace_meta( + snowflake_resources_with_namespace_meta, + db, + dsr_version, + request, + erasure_policy, + run_privacy_request_task, +): + request.getfixturevalue(dsr_version) # REQUIRED to test both DSR 3.0 and 2.0 + + customer_email = snowflake_resources_with_namespace_meta["email"] + snowflake_client = snowflake_resources_with_namespace_meta["client"] + formatted_customer_email = snowflake_resources_with_namespace_meta[ + "formatted_email" + ] + data = { + "requested_at": "2021-08-30T16:09:37.359Z", + "policy_key": erasure_policy.key, + "identity": {"email": customer_email}, + } + pr = get_privacy_request_results( + db, + erasure_policy, + run_privacy_request_task, + data, + task_timeout=PRIVACY_REQUEST_TASK_TIMEOUT_EXTERNAL, + ) + pr.delete(db=db) + + stmt = f'select "name", "variant_eg" from "FIDESOPS_TEST"."TEST"."customer" where "email" = {formatted_customer_email};' + res = snowflake_client.execute(stmt).all() + for row in res: + assert row.name is None + assert row.variant_eg is None