Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Fix some warning caused by deprecation #804

Merged
merged 17 commits into from
Sep 18, 2024
2 changes: 1 addition & 1 deletion .github/workflows/asv.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
if: ${{ steps.build.outcome == 'success' }}

- name: Publish benchmarks artifact
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: Benchmarks log
path: benchmarks/asv_bench/results
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,10 @@ jobs:
- { os: ubuntu-20.04, module: hadoop, python-version: 3.9 }
- { os: ubuntu-latest, module: vineyard, python-version: 3.9 }
- { os: ubuntu-latest, module: external-storage, python-version: 3.9 }
- { os: ubuntu-latest, module: compatibility, python-version: 3.9 }
# always test compatibility with the latest version
# - { os: ubuntu-latest, module: compatibility, python-version: 3.9 }
- { os: ubuntu-latest, module: doc-build, python-version: 3.9 }
- { os: [self-hosted, gpu], module: gpu, python-version: 3.11}
- { os: self-hosted, module: gpu, python-version: 3.11}
- { os: ubuntu-latest, module: jax, python-version: 3.9 }
# a self-hosted runner which needs computing resources, activate when necessary
# - { os: juicefs-ci, module: kubernetes-juicefs, python-version: 3.9 }
Expand Down
2 changes: 1 addition & 1 deletion python/xorbits/_mars/_utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ cdef list tokenize_pandas_dataframe(ob):


cdef list tokenize_pandas_categorical(ob):
l = ob.to_list()
l = ob.tolist()
l.append(ob.shape)
return iterative_tokenize(l)

Expand Down
11 changes: 3 additions & 8 deletions python/xorbits/_mars/dataframe/base/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,7 @@
from typing import Iterable

import pandas as pd
from pandas.api.types import (
is_datetime64_dtype,
is_datetime64tz_dtype,
is_period_dtype,
is_timedelta64_dtype,
)
from pandas.api.types import is_datetime64_dtype, is_timedelta64_dtype

from ...utils import adapt_mars_docstring
from .datetimes import SeriesDatetimeMethod, _datetime_method_to_handlers
Expand Down Expand Up @@ -238,9 +233,9 @@ class DatetimeAccessor:
def __init__(self, series):
if (
not is_datetime64_dtype(series.dtype)
and not is_datetime64tz_dtype(series.dtype)
and not isinstance(series.dtype, pd.DatetimeTZDtype)
and not is_timedelta64_dtype(series.dtype)
and not is_period_dtype(series.dtype)
and not isinstance(series.dtype, pd.PeriodDtype)
):
raise AttributeError("Can only use .dt accessor with datetimelike values")
self._series = series
Expand Down
7 changes: 4 additions & 3 deletions python/xorbits/_mars/dataframe/base/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,10 @@ def execute(cls, ctx, op):
**op.kwds,
)
else:
result = input_data.apply(
func, convert_dtype=op.convert_dtype, args=op.args, **op.kwds
)
if op.convert_dtype:
result = input_data.apply(func, args=op.args, **op.kwds)
else:
result = input_data.apply(func, args=op.args, **op.kwds).astype(object)
ctx[out.key] = result

@classmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def subtract_custom_value(x, custom_value):
).execute()
assert res.data_params["dtype"] == "object"
pd.testing.assert_series_equal(
res.fetch(), s.apply(apply_func, args=(5,), convert_dtype=False)
res.fetch(), s.apply(apply_func, args=(5,)).astype(object)
)

res = ms.apply(
Expand All @@ -220,9 +220,7 @@ def subtract_custom_value(x, custom_value):
assert res.shape == (4,)
with pytest.raises(AttributeError):
_ = res.dtypes
pd.testing.assert_series_equal(
res.fetch(), s.apply(apply_func, args=(5,), convert_dtype=True)
)
pd.testing.assert_series_equal(res.fetch(), s.apply(apply_func, args=(5,)))


def test_apply_execution_with_multi_chunks(setup):
Expand Down
10 changes: 5 additions & 5 deletions python/xorbits/_mars/dataframe/base/tests/test_base_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ def test_series_apply_execute(setup):

r = series.apply(lambda x: [x, x + 1], convert_dtype=False)
result = r.execute().fetch()
expected = s_raw.apply(lambda x: [x, x + 1], convert_dtype=False)
expected = s_raw.apply(lambda x: [x, x + 1]).astype(object)
pd.testing.assert_series_equal(result, expected)

s_raw2 = pd.Series([np.array([1, 2, 3]), np.array([4, 5, 6])])
Expand All @@ -502,7 +502,7 @@ def closure(z):

r = series.apply(closure, convert_dtype=False)
result = r.execute().fetch()
expected = s_raw.apply(closure, convert_dtype=False)
expected = s_raw.apply(closure).astype(object)
pd.testing.assert_series_equal(result, expected)

class callable_series:
Expand All @@ -518,7 +518,7 @@ def __call__(self, z):
cs = callable_series()
r = series.apply(cs, convert_dtype=False)
result = r.execute().fetch()
expected = s_raw.apply(cs, convert_dtype=False)
expected = s_raw.apply(cs).astype(object)
pd.testing.assert_series_equal(result, expected)


Expand All @@ -528,9 +528,9 @@ def test_apply_with_arrow_dtype_execution(setup):
df1 = table.to_pandas(types_mapper=pd.ArrowDtype)
df = from_pandas_df(df1)

r = df.apply(lambda row: str(row[0]) + row[1], axis=1)
r = df.apply(lambda row: str(row.iloc[0]) + row.iloc[1], axis=1)
result = r.execute().fetch()
expected = df1.apply(lambda row: str(row[0]) + row[1], axis=1)
expected = df1.apply(lambda row: str(row.iloc[0]) + row.iloc[1], axis=1)
pd.testing.assert_series_equal(result, expected)

s1 = df1["b"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ def test_date_range():
with pytest.raises(ValueError):
_ = date_range(pd.NaT, periods=10)

expected = pd.date_range("2020-1-1", periods=9.0, name="date")
expected = pd.date_range("2020-1-1", periods=9, name="date")

dr = date_range("2020-1-1", periods=9.0, name="date", chunk_size=3)
assert isinstance(dr, DatetimeIndex)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1281,11 +1281,11 @@ def test_date_range_execution(setup):

# start, end and freq
dr = md.date_range(
"2020-1-1", "2020-1-10", freq="12H", chunk_size=chunk_size, **kw
"2020-1-1", "2020-1-10", freq="12h", chunk_size=chunk_size, **kw
)

result = dr.execute().fetch()
expected = pd.date_range("2020-1-1", "2020-1-10", freq="12H", **kw)
expected = pd.date_range("2020-1-1", "2020-1-10", freq="12h", **kw)
pd.testing.assert_index_equal(result, expected)

# test timezone
Expand Down Expand Up @@ -1317,15 +1317,15 @@ def test_date_range_execution(setup):
pd.testing.assert_index_equal(result, expected)

# test freq
dr = md.date_range(start="1/1/2018", periods=5, freq="M", chunk_size=3)
dr = md.date_range(start="1/1/2018", periods=5, freq="ME", chunk_size=3)

result = dr.execute().fetch()
expected = pd.date_range(start="1/1/2018", periods=5, freq="M")
expected = pd.date_range(start="1/1/2018", periods=5, freq="ME")
pd.testing.assert_index_equal(result, expected)

dr = md.date_range(start="2018/01/01", end="2018/07/01", freq="M")
dr = md.date_range(start="2018/01/01", end="2018/07/01", freq="ME")
result = dr.execute().fetch()
expected = pd.date_range(start="2018/01/01", end="2018/07/01", freq="M")
expected = pd.date_range(start="2018/01/01", end="2018/07/01", freq="ME")
pd.testing.assert_index_equal(result, expected)


Expand Down
2 changes: 1 addition & 1 deletion python/xorbits/_mars/dataframe/indexing/index_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,7 +815,7 @@ def _create_reorder_chunk(
reorder_indexes[-1]
]
params["columns_value"] = parse_index(reorder_columns, store_data=True)
params["dtypes"] = concat_chunk.dtypes[reorder_indexes[-1]]
params["dtypes"] = concat_chunk.dtypes.iloc[reorder_indexes[-1]]

return reorder_chunk_op.new_chunk([concat_chunk], kws=[params])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1725,6 +1725,7 @@ def test_sample_execution(setup):

def test_loc_setitem(setup):
raw_df = pd.DataFrame({"a": [1, 2, 3, 4, 2, 4, 5, 7, 2, 8, 9], 1: [10] * 11})
raw_df = raw_df.astype("object")
md_data = md.DataFrame(raw_df, chunk_size=3)
md_data.loc[md_data["a"] <= 4, 1] = "v1"
pd_data = raw_df.copy(True)
Expand Down
50 changes: 26 additions & 24 deletions python/xorbits/_mars/dataframe/missing/fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,18 @@ def _set_inputs(self, inputs):
def output_limit(self):
return self._output_limit or 1

@staticmethod
def _apply_fillna_with_method(df, value, method, axis, limit, inplace=False):
"""
Parameter method is deprecated since version 2.1.0, use ffill or bfill instead.
"""
if method is not None:
if method in ["backfill", "bfill"]:
return df.bfill(axis=axis, limit=limit, inplace=inplace)
elif method in ["pad", "ffill"]:
return df.ffill(axis=axis, limit=limit, inplace=inplace)
return df.fillna(value=value, axis=axis, inplace=inplace)

@staticmethod
def _get_first_slice(op, df, end):
if op.method == "bfill":
Expand All @@ -115,11 +127,7 @@ def _execute_map(cls, ctx, op):
axis = op.axis
method = op.method

filled = input_data.fillna(
method=method,
axis=axis,
limit=limit,
)
filled = cls._apply_fillna_with_method(input_data, None, method, axis, limit)
ctx[op.outputs[0].key] = cls._get_first_slice(op, filled, 1)
del filled

Expand All @@ -137,15 +145,17 @@ def _execute_combine(cls, ctx, op):
summaries = [ctx[inp.key] for inp in op.inputs[1:]]

if not summaries:
ctx[op.outputs[0].key] = input_data.fillna(
method=method,
axis=axis,
limit=limit,
ctx[op.outputs[0].key] = cls._apply_fillna_with_method(
input_data, None, method, axis, limit
)
return

valid_summary = cls._get_first_slice(
op, pd.concat(summaries, axis=axis).fillna(method=method, axis=axis), 1
op,
cls._apply_fillna_with_method(
pd.concat(summaries, axis=axis), None, method, axis, limit
),
1,
)

if method == "bfill":
Expand All @@ -154,17 +164,12 @@ def _execute_combine(cls, ctx, op):
concat_df = pd.concat([valid_summary, input_data], axis=axis)

if is_pandas_2():
concat_df = concat_df.fillna(
method=method,
axis=axis,
limit=limit,
concat_df = cls._apply_fillna_with_method(
concat_df, None, method, axis, limit
)
else:
concat_df.fillna(
method=method,
axis=axis,
inplace=True,
limit=limit,
concat_df = cls._apply_fillna_with_method(
concat_df, None, method, axis, limit, inplace=True
)
ctx[op.outputs[0].key] = cls._get_first_slice(op, concat_df, -1)

Expand All @@ -180,11 +185,8 @@ def execute(cls, ctx, op):
if isinstance(op.value, ENTITY_TYPE):
value = ctx[op.value.key]
if not isinstance(input_data, pd.Index):
ctx[op.outputs[0].key] = input_data.fillna(
value=value,
method=op.method,
axis=op.axis,
limit=op.limit,
ctx[op.outputs[0].key] = cls._apply_fillna_with_method(
input_data, value, op.method, op.axis, op.limit
)
else:
ctx[op.outputs[0].key] = input_data.fillna(value=value)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,11 @@ def test_dataframe_fill_na_execution(setup):

# test forward fill in axis=0 without limit
r = df.fillna(method="pad")
pd.testing.assert_frame_equal(r.execute().fetch(), df_raw.fillna(method="pad"))
pd.testing.assert_frame_equal(r.execute().fetch(), df_raw.ffill())

# test backward fill in axis=0 without limit
r = df.fillna(method="backfill")
pd.testing.assert_frame_equal(r.execute().fetch(), df_raw.fillna(method="backfill"))
pd.testing.assert_frame_equal(r.execute().fetch(), df_raw.bfill())

# test forward fill in axis=1 without limit
r = df.ffill(axis=1)
Expand Down
Loading