From f559a9fd297ea11cebc727bd03aab64312af438d Mon Sep 17 00:00:00 2001 From: Aditya Nambiar Date: Mon, 26 Aug 2024 23:29:38 -0700 Subject: [PATCH] expr: Make it hashable (#540) --- docs/examples/useful-tips/debugging.py | 3 ++- fennel/expr/test_expr.py | 10 +++++++++- fennel/testing/executor.py | 1 + fennel/utils.py | 8 +++++++- pyproject.toml | 2 +- 5 files changed, 20 insertions(+), 4 deletions(-) diff --git a/docs/examples/useful-tips/debugging.py b/docs/examples/useful-tips/debugging.py index fd0fc9d68..dbe4b9322 100644 --- a/docs/examples/useful-tips/debugging.py +++ b/docs/examples/useful-tips/debugging.py @@ -5,6 +5,7 @@ import pandas as pd from fennel.testing import mock +from fennel.expr import lit __owner__ = "aditya@fennel.ai" @@ -41,7 +42,7 @@ def my_pipeline(cls, user: Dataset): schema = ds.schema() print(schema) # docsnip-highlight end - return ds.assign("country", str, lambda df: "US") + return ds.assign(country=lit("US").astype(str)) # /docsnip diff --git a/fennel/expr/test_expr.py b/fennel/expr/test_expr.py index 30b176d09..d3277659a 100644 --- a/fennel/expr/test_expr.py +++ b/fennel/expr/test_expr.py @@ -4,7 +4,7 @@ from typing import Dict from fennel.datasets import dataset -from fennel.expr import col, when +from fennel.expr import col, when, lit from fennel.expr.visitor import ExprPrinter, FetchReferences from fennel.expr.serializer import ExprSerializer from google.protobuf.json_format import ParseDict # type: ignore @@ -12,6 +12,14 @@ from fennel.testing.test_utils import error_message +def test_const_expr(): + expr = lit(1, int) + assert expr.typeof({}) == int + df = pd.DataFrame({"a": [1, 2, 3, 4]}) + df2 = expr.eval(df, {"a": int}) + assert df2.tolist() == [1, 1, 1, 1] + + def test_basic_expr1(): expr = (col("num") + col("d")).isnull() df = pd.DataFrame({"num": [1, 2, 3, 4], "d": [5, 6, 7, 8]}) diff --git a/fennel/testing/executor.py b/fennel/testing/executor.py index ccb67eca8..de1d8b7d2 100644 --- a/fennel/testing/executor.py +++ b/fennel/testing/executor.py @@ -812,6 +812,7 @@ def visitAssign(self, obj): else: input_df = copy.deepcopy(input_ret.df) df = copy.deepcopy(input_df) + df.reset_index(drop=True, inplace=True) for col, typed_expr in obj.output_expressions.items(): if col in input_ret.df.columns: raise Exception( diff --git a/fennel/utils.py b/fennel/utils.py index c27ce76c0..a3b6ea808 100644 --- a/fennel/utils.py +++ b/fennel/utils.py @@ -9,7 +9,7 @@ import sys import textwrap from typing import Any, cast, Callable, Dict, List, Tuple, Union - +from fennel.expr.expr import TypedExpr, Expr from pandas import DataFrame import fennel._vendor.astunparse as astunparse @@ -163,6 +163,12 @@ def _json_default(item: Any): if isinstance(item, datetime.timedelta): return str(item.total_seconds()) + if isinstance(item, TypedExpr): + return str(item.expr) + str(item.dtype) + + if isinstance(item, Expr): + return str(item) + raise TypeError(f"object of type {type(item).__name__} not hashable") diff --git a/pyproject.toml b/pyproject.toml index 58aa0408e..7f04d08e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "fennel-ai" -version = "1.5.8" +version = "1.5.9" description = "The modern realtime feature engineering platform" authors = ["Fennel AI "] packages = [{ include = "fennel" }]