Skip to content

Commit

Permalink
add test
Browse files Browse the repository at this point in the history
  • Loading branch information
rjzamora committed Jan 10, 2025
1 parent 27cace3 commit f09cc81
Showing 1 changed file with 42 additions and 0 deletions.
42 changes: 42 additions & 0 deletions dask/dataframe/io/tests/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,48 @@ def test_append_dict_column(tmpdir):
assert_eq(expect, result)


@PYARROW_MARK
def test_filter_with_struct_column(tmpdir):
# Check filtering on a table containing
# a multi-field struct column.
table = pa.Table.from_arrays(
[
pa.array(
[
{"subfield1": 10, "subfield2": 12},
{"subfield1": 20, "subfield2": 12},
{"subfield1": 30, "subfield2": 12},
]
),
pa.array(["aa", "bb", "bb"]),
],
schema=pa.schema(
[
(
"nested_column",
pa.struct([("subfield1", pa.int32()), ("subfield2", pa.int32())]),
),
("id", pa.string()),
]
),
)
fn = str(tmpdir) + "file.parq"
pq.write_table(table, fn)

pdf = table.to_pandas()
assert_eq(dd.read_parquet(fn), pdf)
assert_eq(
dd.read_parquet(fn, filters=[("id", "not in", ["bb"])]),
pdf[pdf["id"] != "bb"],
check_index=False,
)
assert_eq(
dd.read_parquet(fn, filters=[("id", "in", ["bb"])]),
pdf[pdf["id"] == "bb"],
check_index=False,
)


def test_ordering(tmpdir, write_engine, read_engine):
tmp = str(tmpdir)
df = pd.DataFrame(
Expand Down

0 comments on commit f09cc81

Please sign in to comment.