Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: restore original udf in the remote function test, use supported type in the test data #996

Merged
merged 9 commits into from
Oct 7, 2024
42 changes: 24 additions & 18 deletions tests/system/large/test_remote_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1670,7 +1670,11 @@ def analyze(row):
(3, 4): ["pq", "rs", "tu"],
(5.0, "six", 7): [8, 9, 10],
'raise Exception("hacked!")': [11, 12, 13],
}
},
# Default pandas index has non-numpy type, whereas bigframes is
# always numpy-based type, so let's use the index compatible
# with bigframes. See more details in b/369689696.
index=pandas.Index([0, 1, 2], dtype=pandas.Int64Dtype()),
),
id="all-kinds-of-column-names",
),
Expand All @@ -1681,17 +1685,22 @@ def analyze(row):
"y": [1.5, 3.75, 5],
"z": ["pq", "rs", "tu"],
},
index=pandas.MultiIndex.from_tuples(
[
("a", 100),
("a", 200),
("b", 300),
]
index=pandas.MultiIndex.from_frame(
pandas.DataFrame(
{
"idx0": pandas.Series(
["a", "a", "b"], dtype=pandas.StringDtype()
),
"idx1": pandas.Series(
[100, 200, 300], dtype=pandas.Int64Dtype()
),
}
)
),
),
id="multiindex",
marks=pytest.mark.skip(
reason="TODO(b/368639580) revert this skip after fix"
reason="TODO: revert this skip after this pandas bug is fixed: https://github.com/pandas-dev/pandas/issues/59908"
),
),
pytest.param(
Expand All @@ -1701,6 +1710,10 @@ def analyze(row):
[20, 3.75, "rs"],
[30, 8.0, "tu"],
],
# Default pandas index has non-numpy type, whereas bigframes is
# always numpy-based type, so let's use the index compatible
# with bigframes. See more details in b/369689696.
index=pandas.Index([0, 1, 2], dtype=pandas.Int64Dtype()),
columns=pandas.MultiIndex.from_arrays(
[
["first", "last_two", "last_two"],
Expand Down Expand Up @@ -1729,10 +1742,8 @@ def test_df_apply_axis_1_complex(session, pd_df):

def serialize_row(row):
custom = {
"name": row.name.item() if hasattr(row.name, "item") else row.name,
"index": [
idx.item() if hasattr(idx, "item") else idx for idx in row.index
],
"name": row.name,
"index": [idx for idx in row.index],
"values": [
val.item() if hasattr(val, "item") else val for val in row.values
],
Expand All @@ -1756,12 +1767,7 @@ def serialize_row(row):
bf_result = bf_df.apply(serialize_row_remote, axis=1).to_pandas()
pd_result = pd_df.apply(serialize_row, axis=1)

# bf_result.dtype is 'string[pyarrow]' while pd_result.dtype is 'object'
# , ignore this mismatch by using check_dtype=False.
#
# bf_result.index[0].dtype is 'string[pyarrow]' while
# pd_result.index[0].dtype is 'object', ignore this mismatch by using
# check_index_type=False.
# ignore known dtype difference between pandas and bigframes
pandas.testing.assert_series_equal(
pd_result, bf_result, check_dtype=False, check_index_type=False
)
Expand Down