From a6c1ff4be897652aea6efc4f902105162183a85f Mon Sep 17 00:00:00 2001 From: Nate Parsons Date: Fri, 10 May 2024 13:10:36 -0500 Subject: [PATCH] add tests --- docs/source/release_notes.rst | 2 +- woodwork/deserializers/utils.py | 3 ++ woodwork/tests/accessor/test_serialization.py | 53 ++++++++++++++++++- 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index b34cda38c..cafc5f5fe 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -6,9 +6,9 @@ Release Notes Future Release ============== * Enhancements + * Add support for Python 3.12 :pr:`1855` * Fixes * Changes - * Add support for Python 3.12 :pr:`1855` * Drop support for using Woodwork with Dask or Pyspark dataframes :pr:`1857` * Use ``filter`` arg in call to ``tarfile.extractall`` to safely deserialize DataFrames :pr:`1862` * Documentation Changes diff --git a/woodwork/deserializers/utils.py b/woodwork/deserializers/utils.py index 1c7ca561e..c0733f3b5 100644 --- a/woodwork/deserializers/utils.py +++ b/woodwork/deserializers/utils.py @@ -100,6 +100,9 @@ def read_table_typing_information(path, typing_info_filename, profile_name): use_smartopen(file_path, path, transport_params) with tarfile.open(str(file_path)) as tar: + import pdb + + pdb.set_trace() if "filter" in getfullargspec(tar.extractall).kwonlyargs: tar.extractall(path=tmpdir, filter="data") else: diff --git a/woodwork/tests/accessor/test_serialization.py b/woodwork/tests/accessor/test_serialization.py index c169965e6..bd04fa777 100644 --- a/woodwork/tests/accessor/test_serialization.py +++ b/woodwork/tests/accessor/test_serialization.py @@ -2,7 +2,7 @@ import os import shutil import warnings -from unittest.mock import patch +from unittest.mock import MagicMock, patch import boto3 import pandas as pd @@ -662,6 +662,35 @@ def test_to_csv_S3(sample_df, s3_client, s3_bucket, profile_name): assert sample_df.ww.schema == deserialized_df.ww.schema +@patch("woodwork.deserializers.deserializer_base.getfullargspec") +def test_to_csv_S3_errors_if_python_version_unsafe( + mock_inspect, + sample_df, + s3_client, + s3_bucket, +): + mock_response = MagicMock() + mock_response.kwonlyargs = [] + mock_inspect.return_value = mock_response + sample_df.ww.init( + name="test_data", + index="id", + semantic_tags={"id": "tag1"}, + logical_types={"age": Ordinal(order=[25, 33, 57])}, + ) + sample_df.ww.to_disk( + TEST_S3_URL, + format="csv", + encoding="utf-8", + engine="python", + profile_name=None, + ) + make_public(s3_client, s3_bucket) + + with pytest.raises(RuntimeError, match="Please upgrade your Python version"): + read_woodwork_table(TEST_S3_URL, profile_name=None) + + @pytest.mark.parametrize("profile_name", [None, False]) def test_serialize_s3_pickle(sample_df, s3_client, s3_bucket, profile_name): sample_df.ww.init() @@ -688,6 +717,28 @@ def test_serialize_s3_parquet(sample_df, s3_client, s3_bucket, profile_name): assert sample_df.ww.schema == deserialized_df.ww.schema +@patch("woodwork.deserializers.parquet_deserializer.getfullargspec") +def test_serialize_s3_parquet_errors_if_python_version_unsafe( + mock_inspect, + sample_df, + s3_client, + s3_bucket, +): + mock_response = MagicMock() + mock_response.kwonlyargs = [] + mock_inspect.return_value = mock_response + sample_df.ww.init() + sample_df.ww.to_disk(TEST_S3_URL, format="parquet", profile_name=None) + make_public(s3_client, s3_bucket) + + with pytest.raises(RuntimeError, match="Please upgrade your Python version"): + read_woodwork_table( + TEST_S3_URL, + filename="data.parquet", + profile_name=None, + ) + + def create_test_credentials(test_path): with open(test_path, "w+") as f: f.write("[test]\n")