From 973111f29af0f7915ff3dd6f294e7df3de01209a Mon Sep 17 00:00:00 2001 From: janezd Date: Sat, 28 Sep 2024 14:50:22 +0200 Subject: [PATCH] OrangeDataFrame: Fix patched constructor This is a proper solution that would work before and would continue to work in pandas>=3 -- if it was not for a bug in pandas (https://github.com/pandas-dev/pandas/issues/59913). Hence, this commit also (dynamically) patches the bug in pandas. --- Orange/data/pandas_compat.py | 38 +++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/Orange/data/pandas_compat.py b/Orange/data/pandas_compat.py index 1780fa6fc77..991154aec15 100644 --- a/Orange/data/pandas_compat.py +++ b/Orange/data/pandas_compat.py @@ -1,11 +1,12 @@ """Pandas DataFrame↔Table conversion helpers""" -from unittest.mock import patch +from functools import partial import numpy as np from scipy import sparse as sp from scipy.sparse import csr_matrix import pandas as pd from pandas.core.arrays import SparseArray +import pandas.core.arrays.sparse.accessor from pandas.api.types import ( is_object_dtype, is_datetime64_any_dtype, @@ -22,6 +23,19 @@ __all__ = ['table_from_frame', 'table_to_frame'] +# Patch a bug in pandas SparseFrameAccessor.to_dense +# As of pandas=3.0.0.dev0+1524.g23c497bb2f, to_dense ignores _constructor +# and alwats returns DataFrame. +# Remove this if https://github.com/pandas-dev/pandas/issues/59913 is fixed +def to_dense(self): + # pylint: disable=protected-access + data = {k: v.array.to_dense() for k, v in self._parent.items()} + constr = self._parent._constructor + return constr(data, index=self._parent.index, columns=self._parent.columns) + +pandas.core.arrays.sparse.accessor.SparseFrameAccessor.to_dense = to_dense + + class OrangeDataFrame(pd.DataFrame): _metadata = ["orange_variables", "orange_weights", "orange_attributes", "orange_role"] @@ -74,8 +88,6 @@ def __init__(self, *args, **kwargs): data = dict(enumerate(sparrays)) super().__init__(data, index=index, **kwargs) self.columns = columns - # a hack to keep Orange df _metadata in sparse->dense conversion - self.sparse.to_dense = self.__patch_constructor(self.sparse.to_dense) else: copy = kwargs.pop("copy", False) super().__init__( @@ -88,21 +100,15 @@ def __init__(self, *args, **kwargs): if table.W.size > 0 else {}) self.orange_attributes = table.attributes - def __patch_constructor(self, method): - def new_method(*args, **kwargs): - with patch( - 'pandas.DataFrame', - OrangeDataFrame - ): - df = method(*args, **kwargs) - df.__finalize__(self) - return df - - return new_method - @property def _constructor(self): - return OrangeDataFrame + return partial(self.from_existing, self) + + @staticmethod + def from_existing(existing, *args, **kwargs): + self = type(existing)(*args, **kwargs) + self.__finalize__(existing) + return self def to_orange_table(self): return table_from_frame(self)