Skip to content

Commit

Permalink
cudf groupby udf & observed
Browse files Browse the repository at this point in the history
  • Loading branch information
luweizheng committed Aug 16, 2024
1 parent a121a2d commit dbba1e2
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1079,7 +1079,6 @@ def __call__(self, s):
)


@support_cuda
@pytest.mark.parametrize(
"chunked,as_index", [(True, True), (True, False), (False, True), (False, False)]
)
Expand All @@ -1092,6 +1091,7 @@ def test_groupby_apply_as_index(chunked, as_index, setup_gpu, gpu):
}
)

# cudf not support udf like this
def udf(v):
denominator = v["a"].sum() * v["a"].mean()
v = v[v["c"] == "c"]
Expand Down
4 changes: 1 addition & 3 deletions python/xorbits/_mars/dataframe/reduction/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1161,9 +1161,7 @@ def _interpret_var(v):
and isinstance(t.op.lhs, DATAFRAME_TYPE)
and isinstance(t.op.rhs, str)
):
# for a cudf dataframe, df == 'foo' doesn't work, so we convert the rhs
# to a tuple.
rhs = f"({rhs},) * len({lhs}.columns)"
rhs = f"{rhs} "
statements = [
f"try:",
f" {var_name} = {lhs}.{func_name}({rhs}, {axis_expr})",
Expand Down
4 changes: 4 additions & 0 deletions python/xorbits/_mars/lib/groupby_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,10 @@ def wrapped_groupby(
groupby_kw.pop("squeeze")
if not _HAS_DROPNA: # pragma: no branch
groupby_kw.pop("dropna")
# cudf currently not support observed,
# cudf default to True, while pandas default is False
if is_cudf(obj): # pragma: no branch
groupby_kw["observed"] = True

groupby_obj = obj.groupby(**groupby_kw)
return GroupByWrapper(obj, groupby_obj=groupby_obj, as_index=as_index)

0 comments on commit dbba1e2

Please sign in to comment.