Skip to content

Commit

Permalink
Fix code to work with Fugue 0.8.7 (#245)
Browse files Browse the repository at this point in the history
* Fix code to work with Fugue 0.8.7

* update

* update

* update

* Update pyproject.toml

Co-authored-by: Faisal <[email protected]>

---------

Co-authored-by: Faisal <[email protected]>
  • Loading branch information
Han Wang and Faisal authored Nov 14, 2023
1 parent b9fcbf9 commit d2cbb41
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 10 deletions.
2 changes: 1 addition & 1 deletion datacompy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "0.10.4"
__version__ = "0.10.5"

from datacompy.core import *
from datacompy.fugue import (
Expand Down
25 changes: 23 additions & 2 deletions datacompy/fugue.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,7 @@ def _distributed_compare(

def _serialize(dfs: Iterable[pd.DataFrame], left: bool) -> Iterable[Dict[str, Any]]:
for df in dfs:
df = df.convert_dtypes()
cols = {}
for name in df.columns:
col = df[name]
Expand Down Expand Up @@ -577,8 +578,28 @@ def _deserialize(
arr = [pickle.loads(r["data"]) for r in df if r["left"] == left]
if len(arr) > 0:
return pd.concat(arr).sort_values(schema.names).reset_index(drop=True)
return pd.DataFrame(
{k: pd.Series(dtype=v) for k, v in schema.pandas_dtype.items()}
# The following is how to construct an empty pandas dataframe with
# the correct schema, it avoids pandas schema inference which is wrong.
# This is not needed when upgrading to Fugue >= 0.8.7
sample_row: List[Any] = []
for field in schema.fields:
if pa.types.is_string(field.type):
sample_row.append("x")
elif pa.types.is_integer(field.type):
sample_row.append(1)
elif pa.types.is_floating(field.type):
sample_row.append(1.1)
elif pa.types.is_boolean(field.type):
sample_row.append(True)
elif pa.types.is_timestamp(field.type):
sample_row.append(pd.NaT)
else:
sample_row.append(None)
return (
pd.DataFrame([sample_row], columns=schema.names)
.astype(schema.pandas_dtype)
.convert_dtypes()
.head(0)
)

def _comp(df: List[Dict[str, Any]]) -> List[List[Any]]:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ dependencies = [
"pandas<=2.0.2,>=0.25.0",
"numpy<=1.26.0,>=1.22.0",
"ordered-set<=4.1.0,>=4.0.2",
"fugue<=0.9.0,>=0.8.6",
"fugue<=0.8.7,>=0.8.7",
]
requires-python = ">=3.8.0"
classifiers = [
Expand Down
14 changes: 8 additions & 6 deletions tests/test_fugue.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,37 +80,39 @@ def upper_col_df(shuffle_df):

@pytest.fixture
def simple_diff_df1():
return pd.DataFrame(dict(aa=[0, 1, 0], bb=[2.1, 3.1, 4.1]))
return pd.DataFrame(dict(aa=[0, 1, 0], bb=[2.1, 3.1, 4.1])).convert_dtypes()


@pytest.fixture
def simple_diff_df2():
return pd.DataFrame(dict(aa=[1, 0, 1], bb=[3.1, 4.1, 5.1], cc=["a", "b", "c"]))
return pd.DataFrame(
dict(aa=[1, 0, 1], bb=[3.1, 4.1, 5.1], cc=["a", "b", "c"])
).convert_dtypes()


@pytest.fixture
def no_intersection_diff_df1():
np.random.seed(0)
return pd.DataFrame(dict(x=["a"], y=[0.1]))
return pd.DataFrame(dict(x=["a"], y=[0.1])).convert_dtypes()


@pytest.fixture
def no_intersection_diff_df2():
return pd.DataFrame(dict(x=["b"], y=[1.1]))
return pd.DataFrame(dict(x=["b"], y=[1.1])).convert_dtypes()


@pytest.fixture
def large_diff_df1():
np.random.seed(0)
data = np.random.randint(0, 7, size=10000)
return pd.DataFrame({"x": data, "y": np.array([9] * 10000)})
return pd.DataFrame({"x": data, "y": np.array([9] * 10000)}).convert_dtypes()


@pytest.fixture
def large_diff_df2():
np.random.seed(0)
data = np.random.randint(6, 11, size=10000)
return pd.DataFrame({"x": data, "y": np.array([9] * 10000)})
return pd.DataFrame({"x": data, "y": np.array([9] * 10000)}).convert_dtypes()


def test_is_match_native(
Expand Down

0 comments on commit d2cbb41

Please sign in to comment.