From 2a8e399213d1c643c429f38e1fcb209c6550b264 Mon Sep 17 00:00:00 2001 From: Jay Chia Date: Fri, 7 Jul 2023 17:39:37 -0700 Subject: [PATCH] Add unit test --- ludwig/data/dataframe/daft.py | 2 +- tests/ludwig/data/dataframe/test_daft.py | 32 ++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 tests/ludwig/data/dataframe/test_daft.py diff --git a/ludwig/data/dataframe/daft.py b/ludwig/data/dataframe/daft.py index cdb0cf98304..e3c54a916f6 100644 --- a/ludwig/data/dataframe/daft.py +++ b/ludwig/data/dataframe/daft.py @@ -64,7 +64,7 @@ class LudwigDaftSeries: ``` """ - def __init__(self, expr: daft.Expression): + def __init__(self, expr: daft.expressions.Expression): self._expr = expr @property diff --git a/tests/ludwig/data/dataframe/test_daft.py b/tests/ludwig/data/dataframe/test_daft.py new file mode 100644 index 00000000000..7bad8e8657a --- /dev/null +++ b/tests/ludwig/data/dataframe/test_daft.py @@ -0,0 +1,32 @@ +import daft +import numpy as np +import pytest + +from ludwig.data.dataframe.daft import DaftEngine, LudwigDaftDataframe, LudwigDaftSeries + + +@pytest.fixture(scope="function") +def df() -> LudwigDaftDataframe: + data = { + "a": [i for i in range(10)], + "b": ["a" * i for i in range(10)], + "c": [np.zeros((i, i)) for i in range(1, 11)], + } + return LudwigDaftDataframe(daft.from_pydict(data)) + + +@pytest.fixture(scope="function", params=[1, 2]) +def engine(request) -> DaftEngine: + parallelism = request.param + return DaftEngine(parallelism=parallelism) + + +def test_df_like(df: LudwigDaftDataframe, engine: DaftEngine): + s1 = LudwigDaftSeries(df["a"].expr * 2) + s2 = LudwigDaftSeries(df["b"].expr + "_suffix") + df = engine.df_like(df, {"foo": s1, "bar": s2}) + pd_df = engine.compute(df) + + assert list(pd_df.columns) == ["a", "b", "c", "foo", "bar"] + np.testing.assert_equal(np.array(pd_df["foo"]), np.array(pd_df["a"] * 2)) + np.testing.assert_equal(np.array(pd_df["bar"]), np.array([item + "_suffix" for item in pd_df["b"]]))