From 92fdce4182175b043f112df1edc14da5c20a9850 Mon Sep 17 00:00:00 2001 From: Nikhil Garg Date: Thu, 19 Sep 2024 17:19:52 -0700 Subject: [PATCH] expr: add more functions, more docs, improve error messages --- docs/api.yml | 36 +- docs/examples/api-reference/expressions/dt.py | 412 ++++++++++++++++++ .../api-reference/expressions/list.py | 196 +++++++++ .../api-reference/expressions/datetime.md | 78 ++++ .../pages/api-reference/expressions/dt/day.md | 37 ++ .../api-reference/expressions/dt/hour.md | 37 ++ .../api-reference/expressions/dt/minute.md | 37 ++ .../api-reference/expressions/dt/month.md | 37 ++ .../api-reference/expressions/dt/second.md | 37 ++ .../api-reference/expressions/dt/since.md | 36 ++ .../expressions/dt/since_epoch.md | 32 ++ .../api-reference/expressions/dt/strftime.md | 44 ++ .../api-reference/expressions/dt/year.md | 37 ++ .../api-reference/expressions/from_epoch.md | 29 ++ .../api-reference/expressions/list/all.md | 30 ++ .../api-reference/expressions/list/any.md | 31 ++ .../api-reference/expressions/list/filter.md | 38 ++ .../api-reference/expressions/list/map.md | 38 ++ .../api-reference/expressions/list/max.md | 30 ++ .../api-reference/expressions/list/mean.md | 33 ++ .../api-reference/expressions/list/min.md | 30 ++ .../api-reference/expressions/list/sum.md | 30 ++ fennel/connectors/test_invalid_connectors.py | 2 +- fennel/datasets/test_invalid_dataset.py | 4 +- fennel/expr/__init__.py | 2 + fennel/expr/expr.py | 173 ++++++-- fennel/expr/serializer.py | 78 +++- fennel/expr/test_expr.py | 104 +++-- fennel/expr/test_invalid_expr.py | 10 +- fennel/expr/visitor.py | 23 + fennel/featuresets/test_invalid_featureset.py | 4 +- fennel/gen/dataset_pb2.py | 116 ++--- fennel/gen/dataset_pb2.pyi | 7 +- fennel/gen/expr_pb2.py | 210 +++++---- fennel/gen/expr_pb2.pyi | 177 +++++++- pyproject.toml | 4 +- 36 files changed, 1977 insertions(+), 282 deletions(-) create mode 100644 docs/examples/api-reference/expressions/dt.py create mode 100644 docs/pages/api-reference/expressions/datetime.md create mode 100644 docs/pages/api-reference/expressions/dt/day.md create mode 100644 docs/pages/api-reference/expressions/dt/hour.md create mode 100644 docs/pages/api-reference/expressions/dt/minute.md create mode 100644 docs/pages/api-reference/expressions/dt/month.md create mode 100644 docs/pages/api-reference/expressions/dt/second.md create mode 100644 docs/pages/api-reference/expressions/dt/since.md create mode 100644 docs/pages/api-reference/expressions/dt/since_epoch.md create mode 100644 docs/pages/api-reference/expressions/dt/strftime.md create mode 100644 docs/pages/api-reference/expressions/dt/year.md create mode 100644 docs/pages/api-reference/expressions/from_epoch.md create mode 100644 docs/pages/api-reference/expressions/list/all.md create mode 100644 docs/pages/api-reference/expressions/list/any.md create mode 100644 docs/pages/api-reference/expressions/list/filter.md create mode 100644 docs/pages/api-reference/expressions/list/map.md create mode 100644 docs/pages/api-reference/expressions/list/max.md create mode 100644 docs/pages/api-reference/expressions/list/mean.md create mode 100644 docs/pages/api-reference/expressions/list/min.md create mode 100644 docs/pages/api-reference/expressions/list/sum.md diff --git a/docs/api.yml b/docs/api.yml index f1e3c7a17..b8139cf9f 100644 --- a/docs/api.yml +++ b/docs/api.yml @@ -88,36 +88,44 @@ sidebar: pages: - "api-reference/expressions/binary" - "api-reference/expressions/col" + - "api-reference/expressions/datetime" - "api-reference/expressions/eval" + - "api-reference/expressions/from_epoch" - "api-reference/expressions/isnull" - "api-reference/expressions/fillnull" - "api-reference/expressions/lit" - "api-reference/expressions/not" - "api-reference/expressions/typeof" - "api-reference/expressions/when" - # - "api-reference/expressions/datetime" - # - "api-reference/expressions/from_epoch" - # - slug: "api-reference/expressions/dt" - # title: "Datetime Expressions" - # pages: - # - "api-reference/expressions/dt.since" - # - "api-reference/expressions/dt.since_epoch" - # - "api-reference/expressions/dt.year" - # - "api-reference/expressions/dt.month" - # - "api-reference/expressions/dt.day" - # - "api-reference/expressions/dt.hour" - # - "api-reference/expressions/dt.minute" - # - "api-reference/expressions/dt.second" - # - "api-reference/expressions/dt.strftime" + - slug: "api-reference/expressions/dt" + title: "Datetime Expressions" + pages: + - "api-reference/expressions/dt/day" + - "api-reference/expressions/dt/hour" + - "api-reference/expressions/dt/minute" + - "api-reference/expressions/dt/month" + - "api-reference/expressions/dt/second" + - "api-reference/expressions/dt/since" + - "api-reference/expressions/dt/since_epoch" + - "api-reference/expressions/dt/strftime" + - "api-reference/expressions/dt/year" - slug: "api-reference/expressions/list" title: "List Expressions" pages: + - "api-reference/expressions/list/all" + - "api-reference/expressions/list/any" - "api-reference/expressions/list/at" - "api-reference/expressions/list/contains" + - "api-reference/expressions/list/filter" - "api-reference/expressions/list/hasnull" - "api-reference/expressions/list/len" + - "api-reference/expressions/list/map" + - "api-reference/expressions/list/max" + - "api-reference/expressions/list/mean" + - "api-reference/expressions/list/min" + - "api-reference/expressions/list/sum" - slug: "api-reference/expressions/num" title: "Num Expressions" diff --git a/docs/examples/api-reference/expressions/dt.py b/docs/examples/api-reference/expressions/dt.py new file mode 100644 index 000000000..206e9d0b9 --- /dev/null +++ b/docs/examples/api-reference/expressions/dt.py @@ -0,0 +1,412 @@ +import pytest +from typing import Optional, List +import pandas as pd +from datetime import datetime + + +def test_year(): + # docsnip year + from fennel.expr import col + + # docsnip-highlight next-line + expr = col("x").dt.year() + + # year works for any datetime type or optional datetime type + assert expr.typeof(schema={"x": datetime}) == int + assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[int] + + # can be evaluated with a dataframe + df = pd.DataFrame( + { + "x": [ + pd.Timestamp("2024-01-01 00:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 10:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 20:20:00", tz="UTC"), + ] + } + ) + schema = {"x": datetime} + assert expr.eval(df, schema=schema).tolist() == [2024, 2024, 2024] + + # also works with timezone aware datetimes + # docsnip-highlight next-line + expr = col("x").dt.year(timezone="US/Eastern") + assert expr.eval(df, schema=schema).tolist() == [2023, 2024, 2024] + # /docsnip + + +def test_month(): + # docsnip month + from fennel.expr import col + + # docsnip-highlight next-line + expr = col("x").dt.month() + + # month works for any datetime type or optional datetime type + assert expr.typeof(schema={"x": datetime}) == int + assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[int] + + # can be evaluated with a dataframe + df = pd.DataFrame( + { + "x": [ + pd.Timestamp("2024-01-01 00:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 10:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 20:20:00", tz="UTC"), + ] + } + ) + schema = {"x": datetime} + assert expr.eval(df, schema=schema).tolist() == [1, 1, 1] + + # also works with timezone aware datetimes + # docsnip-highlight next-line + expr = col("x").dt.month(timezone="US/Eastern") + assert expr.eval(df, schema=schema).tolist() == [12, 1, 1] + # /docsnip + + +def test_from_epoch(): + # docsnip from_epoch + from fennel.expr import col, from_epoch + + # docsnip-highlight next-line + expr = from_epoch(col("x"), unit="second") + + # from_epoch works for any int or optional int type + assert expr.typeof(schema={"x": int}) == datetime + assert expr.typeof(schema={"x": Optional[int]}) == Optional[datetime] + + # can be evaluated with a dataframe + df = pd.DataFrame({"x": [1714857600, 1714857601, 1714857602]}) + schema = {"x": int} + expected = [ + pd.Timestamp("2024-05-04 21:20:00", tz="UTC"), + pd.Timestamp("2024-05-04 21:20:01", tz="UTC"), + pd.Timestamp("2024-05-04 21:20:02", tz="UTC"), + ] + assert expr.eval(df, schema=schema).tolist() == expected + # /docsnip + + +def test_day(): + # docsnip day + from fennel.expr import col + + # docsnip-highlight next-line + expr = col("x").dt.day() + + # day works for any datetime type or optional datetime type + assert expr.typeof(schema={"x": datetime}) == int + assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[int] + + # can be evaluated with a dataframe + df = pd.DataFrame( + { + "x": [ + pd.Timestamp("2024-01-01 00:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 10:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 20:20:00", tz="UTC"), + ] + } + ) + schema = {"x": datetime} + assert expr.eval(df, schema=schema).tolist() == [1, 1, 1] + + # also works with timezone aware datetimes + # docsnip-highlight next-line + expr = col("x").dt.day(timezone="US/Eastern") + assert expr.eval(df, schema=schema).tolist() == [31, 1, 1] + # /docsnip + + +def test_hour(): + # docsnip hour + from fennel.expr import col + + # docsnip-highlight next-line + expr = col("x").dt.hour() + + # hour works for any datetime type or optional datetime type + assert expr.typeof(schema={"x": datetime}) == int + assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[int] + + # can be evaluated with a dataframe + df = pd.DataFrame( + { + "x": [ + pd.Timestamp("2024-01-01 00:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 10:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 20:20:00", tz="UTC"), + ] + } + ) + schema = {"x": datetime} + assert expr.eval(df, schema=schema).tolist() == [0, 10, 20] + + # also works with timezone aware datetimes + # docsnip-highlight next-line + expr = col("x").dt.hour(timezone="US/Eastern") + assert expr.eval(df, schema=schema).tolist() == [19, 5, 15] + # /docsnip + + +def test_minute(): + # docsnip minute + from fennel.expr import col + + # docsnip-highlight next-line + expr = col("x").dt.minute() + + # minute works for any datetime type or optional datetime type + assert expr.typeof(schema={"x": datetime}) == int + assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[int] + + # can be evaluated with a dataframe + df = pd.DataFrame( + { + "x": [ + pd.Timestamp("2024-01-01 00:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 10:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 20:20:00", tz="UTC"), + ] + } + ) + schema = {"x": datetime} + assert expr.eval(df, schema=schema).tolist() == [0, 0, 20] + + # also works with timezone aware datetimes + # docsnip-highlight next-line + expr = col("x").dt.minute(timezone="US/Eastern") + assert expr.eval(df, schema=schema).tolist() == [0, 0, 20] + # /docsnip + + +def test_second(): + # docsnip second + from fennel.expr import col + + # docsnip-highlight next-line + expr = col("x").dt.second() + + # second works for any datetime type or optional datetime type + assert expr.typeof(schema={"x": datetime}) == int + assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[int] + + # can be evaluated with a dataframe + df = pd.DataFrame( + { + "x": [ + pd.Timestamp("2024-01-01 00:00:01", tz="UTC"), + pd.Timestamp("2024-01-01 10:00:02", tz="UTC"), + pd.Timestamp("2024-01-01 20:20:03", tz="UTC"), + ] + } + ) + schema = {"x": datetime} + assert expr.eval(df, schema=schema).tolist() == [1, 2, 3] + + # also works with timezone aware datetimes + # docsnip-highlight next-line + expr = col("x").dt.second(timezone="Asia/Kathmandu") + assert expr.eval(df, schema=schema).tolist() == [1, 2, 3] + # /docsnip + + +def test_since_epoch(): + # docsnip since_epoch + from fennel.expr import col + + # docsnip-highlight next-line + expr = col("x").dt.since_epoch() + + # since_epoch works for any datetime type or optional datetime type + assert expr.typeof(schema={"x": datetime}) == int + assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[int] + + # can be evaluated with a dataframe + df = pd.DataFrame( + { + "x": [ + pd.Timestamp("2024-01-01 00:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 10:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 20:20:00", tz="UTC"), + ] + } + ) + schema = {"x": datetime} + expected = [1704067200, 1704103200, 1704140400] + assert expr.eval(df, schema=schema).tolist() == expected + + # can also change the unit of time + # docsnip-highlight next-line + expr = col("x").dt.since_epoch(unit="minute") + assert expr.eval(df, schema=schema).tolist() == [ + 28401120, + 28401720, + 28402340, + ] + # /docsnip + + expr = col("x").dt.since_epoch(unit="day") + assert expr.eval(df, schema=schema).tolist() == [ + 19723, + 19723, + 19723, + ] + + expr = col("x").dt.since_epoch(unit="hour") + assert expr.eval(df, schema=schema).tolist() == [ + 473352, + 473362, + 473372, + ] + expr = col("x").dt.since_epoch(unit="millisecond") + assert expr.eval(df, schema=schema).tolist() == [ + 1704067200000, + 1704103200000, + 1704140400000, + ] + + expr = col("x").dt.since_epoch(unit="microsecond") + assert expr.eval(df, schema=schema).tolist() == [ + 1704067200000000, + 1704103200000000, + 1704140400000000, + ] + + expr = col("x").dt.since_epoch(unit="week") + assert expr.eval(df, schema=schema).tolist() == [ + 2817, + 2817, + 2817, + ] + + with pytest.raises(ValueError): + col("x").dt.since_epoch(unit="nanosecond") + + +def test_since(): + # docsnip since + from fennel.expr import col + + # docsnip-highlight next-line + expr = col("x").dt.since(col("y")) + + # since works for any datetime type or optional datetime type + assert expr.typeof(schema={"x": datetime, "y": datetime}) == int + assert ( + expr.typeof(schema={"x": Optional[datetime], "y": datetime}) + == Optional[int] + ) + + # can be evaluated with a dataframe + df = pd.DataFrame( + { + "x": [ + pd.Timestamp("2024-01-01 00:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 10:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 20:20:00", tz="UTC"), + ], + "y": [ + pd.Timestamp("2023-01-01 00:00:00", tz="UTC"), + pd.Timestamp("2023-01-02 10:00:00", tz="UTC"), + pd.Timestamp("2023-01-03 20:20:00", tz="UTC"), + ], + } + ) + schema = {"x": datetime, "y": datetime} + expected = [31536000, 31449600, 31363200] + assert expr.eval(df, schema=schema).tolist() == expected + + # can also change the unit of time + # docsnip-highlight next-line + expr = col("x").dt.since(col("y"), unit="minute") + assert expr.eval(df, schema=schema).tolist() == [ + 525600, + 524160, + 522720, + ] + # /docsnip + + expr = col("x").dt.since(col("y"), unit="day") + assert expr.eval(df, schema=schema).tolist() == [ + 365, + 364, + 363, + ] + + expr = col("x").dt.since(col("y"), unit="hour") + assert expr.eval(df, schema=schema).tolist() == [ + 8760, + 8736, + 8712, + ] + + +def test_strftime(): + # docsnip strftime + from fennel.expr import col + + # docsnip-highlight next-line + expr = col("x").dt.strftime("%Y-%m-%d") + + # strftime works for any datetime type or optional datetime type + assert expr.typeof(schema={"x": datetime}) == str + assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[str] + + # can be evaluated with a dataframe + df = pd.DataFrame( + { + "x": [ + pd.Timestamp("2024-01-01 00:00:00", tz="UTC"), + pd.Timestamp("2024-01-02 10:00:00", tz="UTC"), + pd.Timestamp("2024-01-03 20:20:00", tz="UTC"), + ] + } + ) + schema = {"x": datetime} + assert expr.eval(df, schema=schema).tolist() == [ + "2024-01-01", + "2024-01-02", + "2024-01-03", + ] + + # also works with timezone aware datetimes + # docsnip-highlight next-line + expr = col("x").dt.strftime("%Y-%m-%d", timezone="US/Eastern") + assert expr.eval(df, schema=schema).tolist() == [ + "2023-12-31", + "2024-01-02", + "2024-01-03", + ] + # /docsnip + + +def test_datetime(): + # docsnip datetime + # docsnip-highlight next-line + from fennel.expr import datetime as dt + + # docsnip-highlight next-line + expr = dt(year=2024, month=1, day=1) + + # datetime works for any datetime type or optional datetime type + assert expr.typeof() == datetime + + # can be evaluated with a dataframe + df = pd.DataFrame({"dummy": [1, 2, 3]}) + assert expr.eval(df, schema={"dummy": int}).tolist() == [ + pd.Timestamp("2024-01-01 00:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 00:00:00", tz="UTC"), + pd.Timestamp("2024-01-01 00:00:00", tz="UTC"), + ] + # can provide timezone + # docsnip-highlight next-line + expr = dt(year=2024, month=1, day=1, timezone="US/Eastern") + assert expr.eval(df, schema={"dummy": int}).tolist() == [ + pd.Timestamp("2024-01-01 00:00:00", tz="US/Eastern"), + pd.Timestamp("2024-01-01 00:00:00", tz="US/Eastern"), + pd.Timestamp("2024-01-01 00:00:00", tz="US/Eastern"), + ] + # /docsnip diff --git a/docs/examples/api-reference/expressions/list.py b/docs/examples/api-reference/expressions/list.py index d635440f0..acb1adf6b 100644 --- a/docs/examples/api-reference/expressions/list.py +++ b/docs/examples/api-reference/expressions/list.py @@ -136,3 +136,199 @@ def test_at_negative(): schema = {"x": Optional[List[Optional[int]]], "y": int} assert expr.eval(df, schema=schema).tolist() == [3, pd.NA, 5, pd.NA] # /docsnip + + +def test_list_sum(): + # docsnip sum + from fennel.expr import col + + # docsnip-highlight next-line + expr = col("x").list.sum() + + # works for lists of int/float or their optional versions + assert expr.typeof(schema={"x": List[int]}) == int + assert expr.typeof(schema={"x": Optional[List[float]]}) == Optional[float] + + with pytest.raises(Exception): + expr.typeof(schema={"x": List[str]}) + + # can be evaluated as well + df = pd.DataFrame({"x": [[1, 2, 3], [4, 5, None], [], None]}) + schema = {"x": Optional[List[Optional[int]]]} + assert expr.eval(df, schema=schema).tolist() == [6, pd.NA, 0, pd.NA] + # /docsnip + + +def test_list_min(): + # docsnip min + from fennel.expr import col + + # docsnip-highlight next-line + expr = col("x").list.min() + + # works for lists of int/float or their optional versions + assert expr.typeof(schema={"x": List[int]}) == Optional[int] + assert expr.typeof(schema={"x": Optional[List[float]]}) == Optional[float] + + with pytest.raises(Exception): + expr.typeof(schema={"x": List[str]}) + + # can be evaluated as well + df = pd.DataFrame({"x": [[1, 2, 3], [4, 5, None], [], None]}) + schema = {"x": Optional[List[Optional[int]]]} + assert expr.eval(df, schema=schema).tolist() == [1, pd.NA, pd.NA, pd.NA] + # /docsnip + + +def test_list_max(): + # docsnip max + from fennel.expr import col + + # docsnip-highlight next-line + expr = col("x").list.max() + + # works for lists of int/float or their optional versions + assert expr.typeof(schema={"x": List[int]}) == Optional[int] + assert expr.typeof(schema={"x": Optional[List[float]]}) == Optional[float] + + with pytest.raises(Exception): + expr.typeof(schema={"x": List[str]}) + + # can be evaluated as well + df = pd.DataFrame({"x": [[1, 2, 3], [4, 5, None], [], None]}) + schema = {"x": Optional[List[Optional[int]]]} + assert expr.eval(df, schema=schema).tolist() == [3, pd.NA, pd.NA, pd.NA] + # /docsnip + + +def test_list_mean(): + # docsnip mean + from fennel.expr import col + + # docsnip-highlight next-line + expr = col("x").list.mean() + + # works for lists of int/float or their optional versions + assert expr.typeof(schema={"x": List[int]}) == Optional[float] + assert expr.typeof(schema={"x": Optional[List[float]]}) == Optional[float] + + with pytest.raises(Exception): + expr.typeof(schema={"x": List[str]}) + + # can be evaluated as well + df = pd.DataFrame({"x": [[1, 2, 3], [4, 5, None], [], None]}) + schema = {"x": Optional[List[Optional[int]]]} + assert expr.eval(df, schema=schema).tolist() == [2.0, pd.NA, pd.NA, pd.NA] + # /docsnip + + +def test_list_all(): + # docsnip all + from fennel.expr import col + + # docsnip-highlight next-line + expr = col("x").list.all() + + # works for lists of int/float or their optional versions + assert expr.typeof(schema={"x": List[bool]}) == bool + assert expr.typeof(schema={"x": List[Optional[bool]]}) == Optional[bool] + assert ( + expr.typeof(schema={"x": Optional[List[Optional[bool]]]}) + == Optional[bool] + ) + + with pytest.raises(Exception): + expr.typeof(schema={"x": List[str]}) + + # can be evaluated as well + df = pd.DataFrame( + {"x": [[True, True], [True, False], [], None, [True, None]]} + ) + schema = {"x": Optional[List[Optional[bool]]]} + assert expr.eval(df, schema=schema).tolist() == [ + True, + False, + True, + pd.NA, + pd.NA, + ] + # /docsnip + + +def test_list_any(): + # docsnip any + from fennel.expr import col + + # docsnip-highlight next-line + expr = col("x").list.any() + + # works for lists of int/float or their optional versions + assert expr.typeof(schema={"x": List[bool]}) == bool + assert expr.typeof(schema={"x": List[Optional[bool]]}) == Optional[bool] + assert ( + expr.typeof(schema={"x": Optional[List[Optional[bool]]]}) + == Optional[bool] + ) + + with pytest.raises(Exception): + expr.typeof(schema={"x": List[str]}) + + # can be evaluated as well + df = pd.DataFrame( + {"x": [[True, True], [True, False], [], None, [True, None]]} + ) + schema = {"x": Optional[List[Optional[bool]]]} + assert expr.eval(df, schema=schema).tolist() == [ + True, + True, + False, + pd.NA, + True, + ] + # /docsnip + + +def test_list_filter(): + # docsnip filter + from fennel.expr import col, var + + # docsnip-highlight next-line + expr = col("x").list.filter("x", var("x") % 2 == 0) + + # works as long as predicate is valid and evaluates to bool + assert expr.typeof(schema={"x": List[int]}) == List[int] + assert expr.typeof(schema={"x": List[float]}) == List[float] + + with pytest.raises(Exception): + expr.typeof(schema={"x": List[str]}) + + # can be evaluated as well + df = pd.DataFrame({"x": [[1, 2, 3], [], [1, 2, -2], None, [1, 3]]}) + schema = {"x": Optional[List[int]]} + assert expr.eval(df, schema=schema).tolist() == [ + [2], + [], + [2, -2], + pd.NA, + [], + ] + # /docsnip + + +def test_list_map(): + # docsnip map + from fennel.expr import col, var + + # docsnip-highlight next-line + expr = col("x").list.map("x", var("x") % 2) + + # works as long as predicate is valid + assert expr.typeof(schema={"x": List[int]}) == List[int] + assert expr.typeof(schema={"x": List[Optional[int]]}) == List[Optional[int]] + + # can be evaluated as well + df = pd.DataFrame({"x": [[1, 2, 3], [], [1, 2, None], None, [1, 3]]}) + schema = {"x": Optional[List[Optional[int]]]} + expected = [[1, 0, 1], [], [1, 0, pd.NA], pd.NA, [1, 1]] + assert expr.eval(df, schema=schema).tolist() == expected + # /docsnip diff --git a/docs/pages/api-reference/expressions/datetime.md b/docs/pages/api-reference/expressions/datetime.md new file mode 100644 index 000000000..aa8e8d0ff --- /dev/null +++ b/docs/pages/api-reference/expressions/datetime.md @@ -0,0 +1,78 @@ +--- +title: Datetime +order: 0 +status: published +--- + +### Datetime + +Function to get a constant datetime object from its constituent parts. + +#### Parameters + +The year of the datetime. Note that this must be an integer, not an +expression denoting an integer. + + + +The month of the datetime. Note that this must be an integer, not an +expression denoting an integer. + + + +The day of the datetime. Note that this must be an integer, not an +expression denoting an integer. + + + +The hour of the datetime. Note that this must be an integer, not an +expression denoting an integer. + + + + +The minute of the datetime. Note that this must be an integer, not an +expression denoting an integer. + + + +The second of the datetime. Note that this must be an integer, not an +expression denoting an integer. + + + +The millisecond of the datetime. Note that this must be an integer, not an +expression denoting an integer. + + + +The microsecond of the datetime. Note that this must be an integer, not an +expression denoting an integer. + + + +The timezone of the datetime. Note that this must be a string denoting a valid +timezone, not an expression denoting a string. + + +#### Returns + +Returns an expression object denoting the datetime object. + + +
+
+ + +#### Errors + +The month must be between 1 and 12, the day must be between 1 and 31, the hour +must be between 0 and 23, the minute must be between 0 and 59, the second must be +between 0 and 59, the millisecond must be between 0 and 999, and the +microsecond must be between 0 and 999. + +Timezone, if provided, must be a valid timezone string. Note that Fennel only +supports area/location based timezones (e.g. "America/New_York"), not fixed +offsets (e.g. "+05:30" or "UTC+05:30"). + \ No newline at end of file diff --git a/docs/pages/api-reference/expressions/dt/day.md b/docs/pages/api-reference/expressions/dt/day.md new file mode 100644 index 000000000..a0a72d47a --- /dev/null +++ b/docs/pages/api-reference/expressions/dt/day.md @@ -0,0 +1,37 @@ +--- +title: Day +order: 0 +status: published +--- + +### Day + +Function to get the day component of a datetime object. + +#### Parameters + +The timezone in which to interpret the datetime. If not specified, UTC is used. + + +#### Returns + +Returns an expression object denoting the integer value of the day of the +datetime object. + + +
+
+ + +#### Errors + +The `dt` namespace must be invoked on an expression that evaluates to datetime +or optional of datetime. + + + +The timezone, if provided, must be a valid timezone string. Note that Fennel +only supports area/location based timezones (e.g. "America/New_York"), not +fixed offsets (e.g. "+05:30" or "UTC+05:30"). + \ No newline at end of file diff --git a/docs/pages/api-reference/expressions/dt/hour.md b/docs/pages/api-reference/expressions/dt/hour.md new file mode 100644 index 000000000..2eb61e5b1 --- /dev/null +++ b/docs/pages/api-reference/expressions/dt/hour.md @@ -0,0 +1,37 @@ +--- +title: Hour +order: 0 +status: published +--- + +### Hour + +Function to get the hour component of a datetime object. + +#### Parameters + +The timezone in which to interpret the datetime. If not specified, UTC is used. + + +#### Returns + +Returns an expression object denoting the integer value of the hour of the +datetime object. + + +
+
+ + +#### Errors + +The `dt` namespace must be invoked on an expression that evaluates to datetime +or optional of datetime. + + + +The timezone, if provided, must be a valid timezone string. Note that Fennel +only supports area/location based timezones (e.g. "America/New_York"), not +fixed offsets (e.g. "+05:30" or "UTC+05:30"). + \ No newline at end of file diff --git a/docs/pages/api-reference/expressions/dt/minute.md b/docs/pages/api-reference/expressions/dt/minute.md new file mode 100644 index 000000000..0e0e366b3 --- /dev/null +++ b/docs/pages/api-reference/expressions/dt/minute.md @@ -0,0 +1,37 @@ +--- +title: Minute +order: 0 +status: published +--- + +### Minute + +Function to get the minute component of a datetime object. + +#### Parameters + +The timezone in which to interpret the datetime. If not specified, UTC is used. + + +#### Returns + +Returns an expression object denoting the integer value of the minute of the +datetime object. + + +
+
+ + +#### Errors + +The `dt` namespace must be invoked on an expression that evaluates to datetime +or optional of datetime. + + + +The timezone, if provided, must be a valid timezone string. Note that Fennel +only supports area/location based timezones (e.g. "America/New_York"), not +fixed offsets (e.g. "+05:30" or "UTC+05:30"). + \ No newline at end of file diff --git a/docs/pages/api-reference/expressions/dt/month.md b/docs/pages/api-reference/expressions/dt/month.md new file mode 100644 index 000000000..d56cbd85a --- /dev/null +++ b/docs/pages/api-reference/expressions/dt/month.md @@ -0,0 +1,37 @@ +--- +title: Month +order: 0 +status: published +--- + +### Month + +Function to get the month component of a datetime object. + +#### Parameters + +The timezone in which to interpret the datetime. If not specified, UTC is used. + + +#### Returns + +Returns an expression object denoting the integer value of the month of the +datetime object. + + +
+
+ + +#### Errors + +The `dt` namespace must be invoked on an expression that evaluates to datetime +or optional of datetime. + + + +The timezone, if provided, must be a valid timezone string. Note that Fennel +only supports area/location based timezones (e.g. "America/New_York"), not +fixed offsets (e.g. "+05:30" or "UTC+05:30"). + \ No newline at end of file diff --git a/docs/pages/api-reference/expressions/dt/second.md b/docs/pages/api-reference/expressions/dt/second.md new file mode 100644 index 000000000..6cf4a35c9 --- /dev/null +++ b/docs/pages/api-reference/expressions/dt/second.md @@ -0,0 +1,37 @@ +--- +title: Second +order: 0 +status: published +--- + +### Second + +Function to get the second component of a datetime object. + +#### Parameters + +The timezone in which to interpret the datetime. If not specified, UTC is used. + + +#### Returns + +Returns an expression object denoting the integer value of the second of the +datetime object. + + +
+
+ + +#### Errors + +The `dt` namespace must be invoked on an expression that evaluates to datetime +or optional of datetime. + + + +The timezone, if provided, must be a valid timezone string. Note that Fennel +only supports area/location based timezones (e.g. "America/New_York"), not +fixed offsets (e.g. "+05:30" or "UTC+05:30"). + \ No newline at end of file diff --git a/docs/pages/api-reference/expressions/dt/since.md b/docs/pages/api-reference/expressions/dt/since.md new file mode 100644 index 000000000..9e555935c --- /dev/null +++ b/docs/pages/api-reference/expressions/dt/since.md @@ -0,0 +1,36 @@ +--- +title: Since +order: 0 +status: published +--- + +### Since + +Function to get the time elapsed between two datetime objects. + +#### Parameters + +The datetime object to calculate the elapsed time since. + + + +The unit of time to return the elapsed time in. Defaults to seconds. Valid units +are: `week`, `day`,`hour`, `minute`, `second`, `millisecond`, and `microsecond`. + + +#### Returns + +Returns an expression object denoting the integer value of the elapsed time +since the specified datetime object in the specified unit. + + +
+
+ + +#### Errors + +The `dt` namespace must be invoked on an expression that evaluates to datetime +or optional of datetime. + diff --git a/docs/pages/api-reference/expressions/dt/since_epoch.md b/docs/pages/api-reference/expressions/dt/since_epoch.md new file mode 100644 index 000000000..eae2a394b --- /dev/null +++ b/docs/pages/api-reference/expressions/dt/since_epoch.md @@ -0,0 +1,32 @@ +--- +title: Since Epoch +order: 0 +status: published +--- + +### Since Epoch + +Function to get the time elapsed since epoch for a datetime object. + +#### Parameters + +The unit of time to return the elapsed time in. Defaults to seconds. Valid units +are: `week`, `day`,`hour`, `minute`, `second`, `millisecond`, and `microsecond`. + + +#### Returns + +Returns an expression object denoting the integer value of the elapsed time +since epoch for the datetime object in the specified unit. + + +
+
+ + +#### Errors + +The `dt` namespace must be invoked on an expression that evaluates to datetime +or optional of datetime. + diff --git a/docs/pages/api-reference/expressions/dt/strftime.md b/docs/pages/api-reference/expressions/dt/strftime.md new file mode 100644 index 000000000..27c5a7c06 --- /dev/null +++ b/docs/pages/api-reference/expressions/dt/strftime.md @@ -0,0 +1,44 @@ +--- +title: Strftime +order: 0 +status: published +--- + +### Strftime + +Function to format a datetime object as a string. + +#### Parameters + +The format string to use for the datetime. + + + +The timezone in which to interpret the datetime. If not specified, UTC is used. + + +#### Returns + +Returns an expression object denoting the formatted datetime string. + + +
+
+ + +#### Errors + +The `dt` namespace must be invoked on an expression that evaluates to datetime +or optional of datetime. + + + +The format string must be a valid format string. + + + +The timezone must be a valid timezone. Note that Fennel only supports timezones +with area/location names (e.g. `America/New_York`) and not timezones with offsets +(e.g. `+05:00`). + diff --git a/docs/pages/api-reference/expressions/dt/year.md b/docs/pages/api-reference/expressions/dt/year.md new file mode 100644 index 000000000..9a5294b86 --- /dev/null +++ b/docs/pages/api-reference/expressions/dt/year.md @@ -0,0 +1,37 @@ +--- +title: Year +order: 0 +status: published +--- + +### Year + +Function to get the year component of a datetime object. + +#### Parameters + +The timezone in which to interpret the datetime. If not specified, UTC is used. + + +#### Returns + +Returns an expression object denoting the integer value of the year of the +datetime object. + + +
+
+ + +#### Errors + +The `dt` namespace must be invoked on an expression that evaluates to datetime +or optional of datetime. + + + +The timezone, if provided, must be a valid timezone string. Note that Fennel +only supports area/location based timezones (e.g. "America/New_York"), not +fixed offsets (e.g. "+05:30" or "UTC+05:30"). + \ No newline at end of file diff --git a/docs/pages/api-reference/expressions/from_epoch.md b/docs/pages/api-reference/expressions/from_epoch.md new file mode 100644 index 000000000..b8695b208 --- /dev/null +++ b/docs/pages/api-reference/expressions/from_epoch.md @@ -0,0 +1,29 @@ +--- +title: From Epoch +order: 0 +status: published +--- + +### From Epoch + +Function to get a datetime object from a unix timestamp. + +#### Parameters + +The duration (in units as specified by `unit`) since epoch to convert to a datetime +in the form of an expression denoting an integer. + + + +The unit of the `duration` parameter. Can be one of `second`, `millisecond`, +or `microsecond`. Defaults to `second`. + + +#### Returns + +Returns an expression object denoting the datetime object. + + +
+
\ No newline at end of file diff --git a/docs/pages/api-reference/expressions/list/all.md b/docs/pages/api-reference/expressions/list/all.md new file mode 100644 index 000000000..f5538ff60 --- /dev/null +++ b/docs/pages/api-reference/expressions/list/all.md @@ -0,0 +1,30 @@ +--- +title: All +order: 0 +status: published +--- + +### All + +Function to check if all the elements in a boolean list are `True`. + +#### Returns + +Returns an expression object denoting the result of the `all` operation. + +Only works when the list is of type bool or Optional[bool]. For an empty list, +returns an expression denoting `True`. If the list has one or more `None` +elements, the result becomes `None`. + + +
+
+ + +#### Errors + +The `list` namespace must be invoked on an expression that evaluates to list +or optional of list. `All` can only be invoked on lists of bools (or +optionals of bool). + diff --git a/docs/pages/api-reference/expressions/list/any.md b/docs/pages/api-reference/expressions/list/any.md new file mode 100644 index 000000000..2d9a20eda --- /dev/null +++ b/docs/pages/api-reference/expressions/list/any.md @@ -0,0 +1,31 @@ +--- +title: Any +order: 0 +status: published +--- + +### Any + +Function to check if a boolean list contains any `True` value. + +#### Returns + +Returns an expression object denoting the result of `any` operation. + +Only works when the list is of type bool(or optional bool). For +an empty list, returns an expression denoting 'False'. If the list has one or more +`None` elements, the result becomes `None` unless it also has `True` in which case +the result is still `True`. + + +
+
+ + +#### Errors + +The `list` namespace must be invoked on an expression that evaluates to list +or optional of list. `Any` can only be invoked on lists of bool (or +optionals of bool). + diff --git a/docs/pages/api-reference/expressions/list/filter.md b/docs/pages/api-reference/expressions/list/filter.md new file mode 100644 index 000000000..ed2b59ee1 --- /dev/null +++ b/docs/pages/api-reference/expressions/list/filter.md @@ -0,0 +1,38 @@ +--- +title: Filter +order: 0 +status: published +--- + +### Filter + +Function to filter a list down to elements satisfying a predicate. + +#### Parameters + +The variable name to which each element of the list should be bound to +one-by-one. + + + +The predicate expression to be used to filter the list down. This must +evaluate to bool for each element of the list. Note that this expression can +refer to the element under consideration via `var(name)` where name is the +first argument given to the `filter` operation (see example for details). + + +#### Returns + +Returns an expression object denoting the filtered list. + + +
+
+ + +#### Errors + +The `list` namespace must be invoked on an expression that evaluates to list +or optional of list. + \ No newline at end of file diff --git a/docs/pages/api-reference/expressions/list/map.md b/docs/pages/api-reference/expressions/list/map.md new file mode 100644 index 000000000..8b99c08ca --- /dev/null +++ b/docs/pages/api-reference/expressions/list/map.md @@ -0,0 +1,38 @@ +--- +title: Map +order: 0 +status: published +--- + +### Map + +Function to map each element of a list to get another list of the same size. + +#### Parameters + +The variable name to which each element of the list should be bound to +one-by-one. + + + +The expression to be used to transform each element of the list. Note that +this expression can refer to the element under consideration via `var(name)` +where name is the first argument given to the `map` operation (see example for +details). + + +#### Returns + +Returns an expression object denoting the transformed list. + + +
+
+ + +#### Errors + +The `list` namespace must be invoked on an expression that evaluates to list +or optional of list. + \ No newline at end of file diff --git a/docs/pages/api-reference/expressions/list/max.md b/docs/pages/api-reference/expressions/list/max.md new file mode 100644 index 000000000..ad59cc42a --- /dev/null +++ b/docs/pages/api-reference/expressions/list/max.md @@ -0,0 +1,30 @@ +--- +title: Max +order: 0 +status: published +--- + +### Max + +Function to get the maximum value of a list. + +#### Returns + +Returns an expression object denoting the max value of a list. + +Only works when the list is of type int/float (or their optional versions). For +an empty list, returns an expression denoting 'None'. If the list has one or more +`None` elements, the result becomes `None`. + + +
+
+ + +#### Errors + +The `list` namespace must be invoked on an expression that evaluates to list +or optional of list. `Max` can only be invoked on lists of ints/floats (or +optionals of ints/floats). + \ No newline at end of file diff --git a/docs/pages/api-reference/expressions/list/mean.md b/docs/pages/api-reference/expressions/list/mean.md new file mode 100644 index 000000000..c00a0363a --- /dev/null +++ b/docs/pages/api-reference/expressions/list/mean.md @@ -0,0 +1,33 @@ +--- +title: Mean +order: 0 +status: published +--- + +### Mean + +Function to get the mean of the values of a list. + +#### Returns + +Returns an expression object denoting the mean value of a list. + +Only works when the list is of type int/float (or their optional versions). For +an empty list, returns an expression denoting 'None'. If the list has one or more +`None` elements, the result becomes `None`. + +The output type of this expression is either `float` or `Optional[float]` depending +on the inputs. + + +
+
+ + +#### Errors + +The `list` namespace must be invoked on an expression that evaluates to list +or optional of list. `Mean` can only be invoked on lists of ints/floats (or +optionals of ints/floats). + diff --git a/docs/pages/api-reference/expressions/list/min.md b/docs/pages/api-reference/expressions/list/min.md new file mode 100644 index 000000000..6f719ac9d --- /dev/null +++ b/docs/pages/api-reference/expressions/list/min.md @@ -0,0 +1,30 @@ +--- +title: Min +order: 0 +status: published +--- + +### Min + +Function to get the min value of a list. + +#### Returns + +Returns an expression object denoting the min value of a list. + +Only works when the list is of type int/float (or their optional versions). For +an empty list, returns an expression denoting 'None'. If the list has one or more +`None` elements, the result becomes `None`. + + +
+
+ + +#### Errors + +The `list` namespace must be invoked on an expression that evaluates to list +or optional of list. `Min` can only be invoked on lists of ints/floats (or +optionals of ints/floats). + \ No newline at end of file diff --git a/docs/pages/api-reference/expressions/list/sum.md b/docs/pages/api-reference/expressions/list/sum.md new file mode 100644 index 000000000..1eb946036 --- /dev/null +++ b/docs/pages/api-reference/expressions/list/sum.md @@ -0,0 +1,30 @@ +--- +title: Sum +order: 0 +status: published +--- + +### Sum + +Function to get the sum of values of a list. + +#### Returns + +Returns an expression object denoting the sum of the values of the list. + +Only works when the list is of type int/float (or their optional versions). For +an empty list, returns an expression denoting '0'. If the list has one or more +`None` elements, the whole sum becomes `None`. + + +
+
+ + +#### Errors + +The `list` namespace must be invoked on an expression that evaluates to list +or optional of list. `Sum` can only be invoked on lists of ints/floats (or +optionals of ints/floats). + \ No newline at end of file diff --git a/fennel/connectors/test_invalid_connectors.py b/fennel/connectors/test_invalid_connectors.py index c60f7a749..dc9d271c4 100644 --- a/fennel/connectors/test_invalid_connectors.py +++ b/fennel/connectors/test_invalid_connectors.py @@ -940,6 +940,6 @@ class UserInfoDataset: client.commit(datasets=[UserInfoDataset], message="test") assert ( - "`age` is of type `int` in Dataset `UserInfoDataset`, can not be cast to `float`. Full expression: `col('val1')`" + '`age` is of type `int` in Dataset `UserInfoDataset`, can not be cast to `float`. Full expression: `col("val1")`' == str(e.value) ) diff --git a/fennel/datasets/test_invalid_dataset.py b/fennel/datasets/test_invalid_dataset.py index 556e7d704..f4a3f8c9e 100644 --- a/fennel/datasets/test_invalid_dataset.py +++ b/fennel/datasets/test_invalid_dataset.py @@ -247,7 +247,7 @@ def transform(cls, rating: Dataset): .astype(int), ).drop("rating", "movie") - expected_err = "'movie_suffixed' is expected to be of type `int`, but evaluates to `str`. Full expression: `col('movie') + \"_suffix\"`" + expected_err = '\'movie_suffixed\' is expected to be of type `int`, but evaluates to `str`. Full expression: `col("movie") + "_suffix"`' assert expected_err in str(e.value) with pytest.raises(TypeError) as e2: @@ -295,7 +295,7 @@ def transform(cls, rating: Dataset): assert ( str(e2.value) - == """invalid assign - '[Pipeline:transform]->assign node' error in expression for column `movie_suffixed`: Failed to compile expression: invalid expression: both sides of '+' must be numeric types but found String & String, left: col(movie), right: lit(String("_suffix"))""" + == """invalid assign - '[Pipeline:transform]->assign node' error in expression for column `movie_suffixed`: Failed to compile expression: invalid expression: both sides of '+' must be numeric types but found String & String, left: col("movie"), right: lit(String("_suffix"))""" ) diff --git a/fennel/expr/__init__.py b/fennel/expr/__init__.py index 1a7e235a1..362f199b5 100644 --- a/fennel/expr/__init__.py +++ b/fennel/expr/__init__.py @@ -3,6 +3,8 @@ lit, when, make_struct, + var, + datetime, from_epoch, Expr, InvalidExprException, diff --git a/fennel/expr/expr.py b/fennel/expr/expr.py index fcf037482..c926a6902 100644 --- a/fennel/expr/expr.py +++ b/fennel/expr/expr.py @@ -430,6 +430,14 @@ def __str__(self) -> str: return f"{self.expr}" +class Var(Expr): + def __init__(self, var: str): + self.var = var + + def __str__(self) -> str: + return f"var({self.var})" + + ######################################################### # Math Functions ######################################################### @@ -694,6 +702,7 @@ def from_string(time_unit_str: str | TimeUnit) -> TimeUnit: @dataclass class DateTimeParts(DateTimeOp): part: TimeUnit + timezone: Optional[str] @dataclass @@ -710,6 +719,7 @@ class DateTimeSinceEpoch(DateTimeOp): @dataclass class DateTimeStrftime(DateTimeOp): format: str + timezone: Optional[str] @dataclass @@ -718,65 +728,67 @@ class DateTimeFromEpoch(Expr): unit: TimeUnit +@dataclass +class DateTimeLiteral(DateTimeOp): + year: int + month: int + day: int + hour: int + minute: int + second: int + microsecond: int + timezone: Optional[str] + + class _DateTime(Expr): def __init__(self, expr: Expr, op: DateTimeOp): self.op = op self.operand = expr super(_DateTime, self).__init__() - def parts(self, part: TimeUnit) -> _Number: + def parts(self, part: TimeUnit, timezone: Optional[str] = "UTC") -> _Number: part = TimeUnit.from_string(part) - return _Number(_DateTime(self, DateTimeParts(part)), MathNoop()) + return _Number( + _DateTime(self, DateTimeParts(part, timezone)), MathNoop() + ) - def since(self, other: Expr, unit: TimeUnit) -> _Number: + def since(self, other: Expr, unit: TimeUnit = "second") -> _Number: unit = TimeUnit.from_string(unit) other_expr = make_expr(other) return _Number( _DateTime(self, DateTimeSince(other_expr, unit)), MathNoop() ) - def since_epoch(self, unit: TimeUnit) -> _Number: + def since_epoch(self, unit: TimeUnit = "second") -> _Number: unit = TimeUnit.from_string(unit) return _Number(_DateTime(self, DateTimeSinceEpoch(unit)), MathNoop()) - def strftime(self, format: str) -> _String: - return _String(_DateTime(self, DateTimeStrftime(format)), StringNoop()) - - @property - def year(self) -> _Number: - return self.parts(TimeUnit.YEAR) - - @property - def month(self) -> _Number: - return self.parts(TimeUnit.MONTH) + def strftime(self, format: str, timezone: Optional[str] = "UTC") -> _String: + return _String( + _DateTime(self, DateTimeStrftime(format=format, timezone=timezone)), + StringNoop(), + ) - @property - def week(self) -> _Number: - return self.parts(TimeUnit.WEEK) + def year(self, timezone: Optional[str] = "UTC") -> _Number: + return self.parts(TimeUnit.YEAR, timezone) - @property - def day(self) -> _Number: - return self.parts(TimeUnit.DAY) + def month(self, timezone: Optional[str] = "UTC") -> _Number: + return self.parts(TimeUnit.MONTH, timezone) - @property - def hour(self) -> _Number: - return self.parts(TimeUnit.HOUR) + def week(self, timezone: Optional[str] = "UTC") -> _Number: + return self.parts(TimeUnit.WEEK, timezone) - @property - def minute(self) -> _Number: - return self.parts(TimeUnit.MINUTE) + def day(self, timezone: Optional[str] = "UTC") -> _Number: + return self.parts(TimeUnit.DAY, timezone) - @property - def second(self) -> _Number: - return self.parts(TimeUnit.SECOND) + def hour(self, timezone: Optional[str] = "UTC") -> _Number: + return self.parts(TimeUnit.HOUR, timezone) - @property - def millisecond(self) -> _Number: - return self.parts(TimeUnit.MILLISECOND) + def minute(self, timezone: Optional[str] = "UTC") -> _Number: + return self.parts(TimeUnit.MINUTE, timezone) - @property - def microsecond(self) -> _Number: - return self.parts(TimeUnit.MICROSECOND) + def second(self, timezone: Optional[str] = "UTC") -> _Number: + return self.parts(TimeUnit.SECOND, timezone) ######################################################### @@ -792,6 +804,42 @@ class ListLen(ListOp): pass +class ListSum(ListOp): + pass + + +class ListMin(ListOp): + pass + + +class ListMax(ListOp): + pass + + +class ListAll(ListOp): + pass + + +class ListAny(ListOp): + pass + + +class ListMean(ListOp): + pass + + +@dataclass +class ListFilter(ListOp): + var: str + predicate: Expr + + +@dataclass +class ListMap(ListOp): + var: str + expr: Expr + + @dataclass class ListContains(ListOp): item: Expr @@ -830,6 +878,30 @@ def at(self, index: Expr) -> Expr: def hasnull(self) -> _Bool: return _Bool(_List(self, ListHasNull())) + def sum(self) -> _Number: + return _Number(_List(self, ListSum()), MathNoop()) + + def mean(self) -> _Number: + return _Number(_List(self, ListMean()), MathNoop()) + + def min(self) -> _Number: + return _Number(_List(self, ListMin()), MathNoop()) + + def max(self) -> _Number: + return _Number(_List(self, ListMax()), MathNoop()) + + def all(self) -> _Bool: + return _Bool(_List(self, ListAll())) + + def any(self) -> _Bool: + return _Bool(_List(self, ListAny())) + + def filter(self, var: str, predicate: Expr) -> _List: + return _List(self, ListFilter(var=var, predicate=predicate)) + + def map(self, var: str, expr: Expr) -> _List: + return _List(self, ListMap(var=var, expr=expr)) + ####################################################### @@ -961,7 +1033,7 @@ def __init__(self, col: str): super(Ref, self).__init__() def __str__(self) -> str: - return f"col('{self._col}')" + return f'col("{self._col}")' class IsNull(Expr): @@ -1015,6 +1087,10 @@ def col(col: str) -> Expr: return Ref(col) +def var(var: str) -> Expr: + return Var(var) + + def lit(v: Any, type: Optional[Type] = None) -> Expr: # TODO: Add support for more types recursively if type is not None: @@ -1048,3 +1124,28 @@ def from_epoch(duration: Expr, unit: str | TimeUnit) -> _DateTime: duration = make_expr(duration) unit = TimeUnit.from_string(unit) return _DateTime(DateTimeFromEpoch(duration, unit), DateTimeNoop()) + + +def datetime( + year: int, + month: int, + day: int, + hour: int = 0, + minute: int = 0, + second: int = 0, + microsecond: int = 0, + timezone: Optional[str] = "UTC", +) -> _DateTime: + return _DateTime( + DateTimeLiteral( + year=year, + month=month, + day=day, + hour=hour, + minute=minute, + second=second, + microsecond=microsecond, + timezone=timezone, + ), + DateTimeNoop(), + ) diff --git a/fennel/expr/serializer.py b/fennel/expr/serializer.py index 3ad6019b9..0673ff049 100644 --- a/fennel/expr/serializer.py +++ b/fennel/expr/serializer.py @@ -17,6 +17,14 @@ ListHasNull, ListLen, ListNoop, + ListSum, + ListMean, + ListMin, + ListMax, + ListAll, + ListAny, + ListFilter, + ListMap, Literal, Ref, StructGet, @@ -60,8 +68,8 @@ def time_unit_to_proto(unit: TimeUnit) -> proto.TimeUnit: - if unit == TimeUnit.MILLISECOND: - return proto.TimeUnit.MILLISECOND + if unit == TimeUnit.MICROSECOND: + return proto.TimeUnit.MICROSECOND elif unit == TimeUnit.MILLISECOND: return proto.TimeUnit.MILLISECOND elif unit == TimeUnit.SECOND: @@ -104,6 +112,11 @@ def visitRef(self, obj): expr.ref.name = obj._col return expr + def visitVar(self, obj): + expr = proto.Expr() + expr.var.name = obj.var + return expr + def visitUnary(self, obj): expr = proto.Expr() if obj.op == "~": @@ -321,8 +334,14 @@ def visitDateTime(self, obj): if isinstance(obj.op, DateTimeNoop): return self.visit(obj.operand) elif isinstance(obj.op, DateTimeParts): - part = proto.Part() - part.unit = time_unit_to_proto(obj.op.part) + part = proto.Part( + unit=time_unit_to_proto(obj.op.part), + timezone=( + proto.Timezone(timezone=obj.op.timezone) + if obj.op.timezone is not None + else None + ), + ) expr.datetime_fn.fn.CopyFrom(proto.DateTimeOp(part=part)) elif isinstance(obj.op, DateTimeSince): expr.datetime_fn.fn.CopyFrom( @@ -346,6 +365,11 @@ def visitDateTime(self, obj): proto.DateTimeOp( strftime=proto.Strftime( format=obj.op.format, + timezone=( + proto.Timezone(timezone=obj.op.timezone) + if obj.op.timezone is not None + else None + ), ) ) ) @@ -376,6 +400,35 @@ def visitList(self, obj): expr.list_fn.fn.CopyFrom(proto.ListOp(len=proto.Len())) elif isinstance(obj.op, ListHasNull): expr.list_fn.fn.CopyFrom(proto.ListOp(has_null=proto.HasNull())) + elif isinstance(obj.op, ListSum): + expr.list_fn.fn.CopyFrom(proto.ListOp(sum=proto.ListSum())) + elif isinstance(obj.op, ListMean): + expr.list_fn.fn.CopyFrom(proto.ListOp(mean=proto.ListMean())) + elif isinstance(obj.op, ListMin): + expr.list_fn.fn.CopyFrom(proto.ListOp(min=proto.ListMin())) + elif isinstance(obj.op, ListMax): + expr.list_fn.fn.CopyFrom(proto.ListOp(max=proto.ListMax())) + elif isinstance(obj.op, ListAll): + expr.list_fn.fn.CopyFrom(proto.ListOp(all=proto.ListAll())) + elif isinstance(obj.op, ListAny): + expr.list_fn.fn.CopyFrom(proto.ListOp(any=proto.ListAny())) + elif isinstance(obj.op, ListFilter): + expr.list_fn.fn.CopyFrom( + proto.ListOp( + filter=proto.ListFilter( + var=obj.op.var, predicate=self.visit(obj.op.predicate) + ) + ) + ) + elif isinstance(obj.op, ListMap): + expr.list_fn.fn.CopyFrom( + proto.ListOp( + map=proto.ListMap( + var=obj.op.var, map_expr=self.visit(obj.op.expr) + ) + ) + ) + expr.list_fn.list.CopyFrom(self.visit(obj.expr)) return expr @@ -415,6 +468,23 @@ def visitDateTimeFromEpoch(self, obj): expr.from_epoch.CopyFrom(from_epoch) return expr + def visitDateTimeLiteral(self, obj): + expr = proto.Expr() + datetime_literal = proto.DatetimeLiteral() + datetime_literal.year = obj.year + datetime_literal.month = obj.month + datetime_literal.day = obj.day + datetime_literal.hour = obj.hour + datetime_literal.minute = obj.minute + datetime_literal.second = obj.second + datetime_literal.microsecond = obj.microsecond + if obj.timezone is not None: + datetime_literal.timezone.CopyFrom( + proto.Timezone(timezone=obj.timezone) + ) + expr.datetime_literal.CopyFrom(datetime_literal) + return expr + def val_as_json(val: Any) -> str: if isinstance(val, str): diff --git a/fennel/expr/test_expr.py b/fennel/expr/test_expr.py index 9fa7b6e78..1fe688379 100644 --- a/fennel/expr/test_expr.py +++ b/fennel/expr/test_expr.py @@ -62,7 +62,7 @@ def test_unary_expr(): def test_basic_expr2(): expr = col("a") + col("b") + 3 printer = ExprPrinter() - expected = "((col('a') + col('b')) + 3)" + expected = '((col("a") + col("b")) + 3)' assert expected == printer.print(expr.root) serializer = ExprSerializer() proto_expr = serializer.serialize(expr.root) @@ -102,7 +102,7 @@ class TestDataset: def test_math_expr(): expr = (col("a").num.floor() + 3.2).num.ceil() printer = ExprPrinter() - expected = "CEIL((FLOOR(col('a')) + 3.2))" + expected = 'CEIL((FLOOR(col("a")) + 3.2))' assert expected == printer.print(expr.root) serializer = ExprSerializer() proto_expr = serializer.serialize(expr.root) @@ -164,7 +164,7 @@ def test_math_expr(): def test_bool_expr(): expr = (col("a") == 5) | ((col("b") == "random") & (col("c") == 3.2)) printer = ExprPrinter() - expected = """((col('a') == 5) or ((col('b') == "random") and (col('c') == 3.2)))""" + expected = """((col("a") == 5) or ((col("b") == "random") and (col("c") == 3.2)))""" assert expected == printer.print(expr.root) df = pd.DataFrame( @@ -186,7 +186,7 @@ def test_bool_expr(): def test_str_expr(): expr = (col("a").str.concat(col("b"))).str.lower().len().ceil() printer = ExprPrinter() - expected = "CEIL(LEN(LOWER(col('a') + col('b'))))" + expected = 'CEIL(LEN(LOWER(col("a") + col("b"))))' assert expected == printer.print(expr.root) ref_extractor = FetchReferences() ref_extractor.visit(expr.root) @@ -199,7 +199,7 @@ def test_str_expr(): .then(col("b")) .otherwise("No Match") ) - expected = """WHEN CONTAINS(UPPER(col('a') + col('b')), col('c')) THEN col('b') ELSE "No Match\"""" + expected = """WHEN CONTAINS(UPPER(col("a") + col("b")), col("c")) THEN col("b") ELSE "No Match\"""" assert expected == printer.print(expr.root) ref_extractor = FetchReferences() assert ref_extractor.fetch(expr.root) == {"a", "b", "c"} @@ -232,7 +232,7 @@ def test_str_expr(): .then(col("c")) .otherwise("No Match") ) - expected = """WHEN CONTAINS(col('a'), "p") THEN col('b') WHEN CONTAINS(col('b'), "b") THEN col('a') WHEN CONTAINS(col('c'), "C") THEN col('c') ELSE "No Match\"""" + expected = """WHEN CONTAINS(col("a"), "p") THEN col("b") WHEN CONTAINS(col("b"), "b") THEN col("a") WHEN CONTAINS(col("c"), "C") THEN col("c") ELSE "No Match\"""" assert expected == printer.print(expr.root) serializer = ExprSerializer() proto_expr = serializer.serialize(expr.root) @@ -322,7 +322,7 @@ def test_dict_op(): ).dict.len() printer = ExprPrinter() expected = ( - """(CEIL((col('a').get("x") + col('a').get("y"))) + LEN(col('a')))""" + """(CEIL((col("a").get("x") + col("a").get("y"))) + LEN(col("a")))""" ) ref_extractor = FetchReferences() ref_extractor.visit(expr.root) @@ -476,7 +476,7 @@ def test_datetime_expr(): {"a": ["2021-01-01", "2021-01-02", "2021-01-03", "2021-01-04"]} ), schema={"a": str}, - display="STRPTIME(col('a'), %Y-%m-%d, UTC)", + display='STRPTIME(col("a"), %Y-%m-%d, UTC)', refs={"a"}, eval_result=[ pd.Timestamp("2021-01-01 00:00:00+0000", tz="UTC"), @@ -494,7 +494,7 @@ def test_datetime_expr(): {"a": ["2021-01-01", "2021-01-02", "2021-01-03", "2021-01-04"]} ), schema={"a": str}, - display="STRPTIME(col('a'), %Y-%m-%d, America/New_York)", + display='STRPTIME(col("a"), %Y-%m-%d, America/New_York)', refs={"a"}, eval_result=[ pd.Timestamp("2021-01-01 05:00:00+0000", tz="UTC"), @@ -542,13 +542,12 @@ def test_parse(): expr=(col("a").str.parse(int)), df=pd.DataFrame({"a": ["1", "2", "3", "4"]}), schema={"a": str}, - display="PARSE(col('a'), )", + display="PARSE(col(\"a\"), )", refs={"a"}, eval_result=[1, 2, 3, 4], expected_dtype=int, proto_json=None, ), - # Parse a struct ExprTestCase( expr=(col("a").str.parse(A)), df=pd.DataFrame( @@ -560,7 +559,7 @@ def test_parse(): } ), schema={"a": str}, - display="PARSE(col('a'), )", + display="PARSE(col(\"a\"), )", refs={"a"}, eval_result=[A(1, 2, "a"), A(2, 3, "b")], expected_dtype=A, @@ -571,7 +570,7 @@ def test_parse(): expr=(col("a").str.parse(List[int])), df=pd.DataFrame({"a": ["[1, 2, 3]", "[4, 5, 6]"]}), schema={"a": str}, - display="PARSE(col('a'), typing.List[int])", + display='PARSE(col("a"), typing.List[int])', refs={"a"}, eval_result=[[1, 2, 3], [4, 5, 6]], expected_dtype=List[int], @@ -588,7 +587,7 @@ def test_parse(): } ), schema={"a": str}, - display="PARSE(col('a'), )", + display="PARSE(col(\"a\"), )", refs={"a"}, eval_result=[Nested(A(1, 2, "a"), B(1, "b"), [1, 2, 3])], expected_dtype=Nested, @@ -599,7 +598,7 @@ def test_parse(): expr=(col("a").str.parse(float)), df=pd.DataFrame({"a": ["1.1", "2.2", "3.3", "4.4"]}), schema={"a": str}, - display="PARSE(col('a'), )", + display="PARSE(col(\"a\"), )", refs={"a"}, eval_result=[1.1, 2.2, 3.3, 4.4], expected_dtype=float, @@ -610,7 +609,7 @@ def test_parse(): expr=(col("a").str.parse(bool)), df=pd.DataFrame({"a": ["true", "false", "true", "false"]}), schema={"a": str}, - display="PARSE(col('a'), )", + display="PARSE(col(\"a\"), )", refs={"a"}, eval_result=[True, False, True, False], expected_dtype=bool, @@ -621,7 +620,7 @@ def test_parse(): expr=(col("a").str.parse(str)), df=pd.DataFrame({"a": ['"a1"', '"b"', '"c"', '"d"']}), schema={"a": str}, - display="PARSE(col('a'), )", + display="PARSE(col(\"a\"), )", refs={"a"}, eval_result=["a1", "b", "c", "d"], expected_dtype=str, @@ -713,7 +712,6 @@ def test_parse(): proto_json={}, ), ] - for case in cases: check_test_case(case) @@ -724,7 +722,7 @@ def test_list(): expr=(col("a").list.at(0)), df=pd.DataFrame({"a": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}), schema={"a": List[int]}, - display="col('a')[0]", + display='col("a")[0]', refs={"a"}, eval_result=[1, 4, 7], expected_dtype=Optional[int], @@ -745,7 +743,7 @@ def test_list(): } ), schema={"a": List[int], "b": int, "c": int}, - display="col('a')[(col('b') + col('c'))]", + display='col("a")[(col("b") + col("c"))]', refs={"a", "b", "c"}, eval_result=[2, 12, 9], expected_dtype=Optional[int], @@ -761,7 +759,7 @@ def test_list(): } ), schema={"a": List[int], "b": int}, - display="col('a')[col('b')]", + display='col("a")[col("b")]', refs={"a", "b"}, eval_result=[1, pd.NA, pd.NA], expected_dtype=Optional[int], @@ -772,7 +770,7 @@ def test_list(): expr=(~col("a").list.contains(3)), df=pd.DataFrame({"a": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}), schema={"a": List[int]}, - display="~(CONTAINS(col('a'), 3))", + display='~(CONTAINS(col("a"), 3))', refs={"a"}, eval_result=[False, True, True], expected_dtype=bool, @@ -789,7 +787,7 @@ def test_list(): } ), schema={"a": List[int], "b": int, "c": int}, - display="CONTAINS(col('a'), (col('b') * col('c')))", + display='CONTAINS(col("a"), (col("b") * col("c")))', refs={"a", "b", "c"}, eval_result=[True, True, False], expected_dtype=bool, @@ -810,7 +808,7 @@ def test_list(): } ), schema={"a2": List[str], "b2": str}, - display="""CONTAINS(col('a2'), col('b2'))""", + display="""CONTAINS(col("a2"), col("b2"))""", refs={"a2", "b2"}, eval_result=[True, True, False, False], expected_dtype=bool, @@ -827,7 +825,7 @@ def test_list(): {"a": [[A(1, 2, "a"), A(2, 3, "b"), A(4, 5, "c")]]} ), schema={"a": List[A]}, - display="""CONTAINS(col('a'), STRUCT(x=1, y=2, z="a"))""", + display="""CONTAINS(col("a"), STRUCT(x=1, y=2, z="a"))""", refs={"a"}, eval_result=[True], expected_dtype=bool, @@ -839,7 +837,7 @@ def test_list(): {"a": [[A(1, 2, "a"), A(2, 3, "b"), A(4, 5, "c")]]} ), schema={"a": List[A]}, - display="LEN(col('a'))", + display='LEN(col("a"))', refs={"a"}, eval_result=[3], expected_dtype=int, @@ -850,7 +848,7 @@ def test_list(): expr=(col("a").list.len()), df=pd.DataFrame({"a": [[1, 2, 3], [4, 5, 6, 12], [7, 8, 9, 19]]}), schema={"a": List[int]}, - display="LEN(col('a'))", + display='LEN(col("a"))', refs={"a"}, eval_result=[3, 4, 4], expected_dtype=int, @@ -861,7 +859,7 @@ def test_list(): expr=(col("a").list.len()), df=pd.DataFrame({"a": [[], [4, 5, 6, 12], [7, 8, 9, 19]]}), schema={"a": List[int]}, - display="LEN(col('a'))", + display='LEN(col("a"))', refs={"a"}, eval_result=[0, 4, 4], expected_dtype=int, @@ -880,7 +878,7 @@ def test_struct(): expr=(col("a").struct.get("x")), df=pd.DataFrame({"a": [A(1, 2, "a"), A(2, 3, "b"), A(4, 5, "c")]}), schema={"a": A}, - display="col('a').x", + display='col("a").x', refs={"a"}, eval_result=[1, 2, 4], expected_dtype=int, @@ -890,7 +888,7 @@ def test_struct(): expr=(col("a").struct.get("x") + col("a").struct.get("y")), df=pd.DataFrame({"a": [A(1, 2, "a"), A(2, 3, "b"), A(4, 5, "c")]}), schema={"a": A}, - display="(col('a').x + col('a').y)", + display='(col("a").x + col("a").y)', refs={"a"}, eval_result=[3, 5, 9], expected_dtype=int, @@ -906,7 +904,7 @@ def test_datetime(): cases = [ # Extract year from a datetime ExprTestCase( - expr=(col("a").dt.year), + expr=(col("a").dt.year()), df=pd.DataFrame( { "a": [ @@ -917,7 +915,7 @@ def test_datetime(): } ), schema={"a": datetime}, - display="DATEPART(col('a'), TimeUnit.YEAR)", + display='DATEPART(col("a"), TimeUnit.YEAR)', refs={"a"}, eval_result=[2021, 2021, 2021], expected_dtype=int, @@ -925,7 +923,7 @@ def test_datetime(): ), # Extract month from a datetime ExprTestCase( - expr=(col("a").dt.month), + expr=(col("a").dt.month()), df=pd.DataFrame( { "a": [ @@ -936,7 +934,7 @@ def test_datetime(): } ), schema={"a": datetime}, - display="DATEPART(col('a'), TimeUnit.MONTH)", + display='DATEPART(col("a"), TimeUnit.MONTH)', refs={"a"}, eval_result=[1, 2, 3], expected_dtype=int, @@ -944,7 +942,7 @@ def test_datetime(): ), # Extract week from a datetime ExprTestCase( - expr=(col("a").dt.week), + expr=(col("a").dt.week()), df=pd.DataFrame( { "a": [ @@ -955,7 +953,7 @@ def test_datetime(): } ), schema={"a": datetime}, - display="DATEPART(col('a'), TimeUnit.WEEK)", + display='DATEPART(col("a"), TimeUnit.WEEK)', refs={"a"}, eval_result=[53, 5, 9], expected_dtype=int, @@ -981,7 +979,7 @@ def test_datetime(): } ), schema={"a": datetime}, - display="""SINCE(col('a'), STRPTIME("2021-01-01 00:01:00+0000", %Y-%m-%d %H:%M:%S%z, UTC), unit=TimeUnit.DAY)""", + display="""SINCE(col("a"), STRPTIME("2021-01-01 00:01:00+0000", %Y-%m-%d %H:%M:%S%z, UTC), unit=TimeUnit.DAY)""", refs={"a"}, eval_result=[0, 32, 61], expected_dtype=int, @@ -1007,7 +1005,7 @@ def test_datetime(): } ), schema={"a": datetime}, - display="""SINCE(col('a'), STRPTIME("2021-01-01 00:01:00+0000", %Y-%m-%d %H:%M:%S%z, UTC), unit=TimeUnit.YEAR)""", + display="""SINCE(col("a"), STRPTIME("2021-01-01 00:01:00+0000", %Y-%m-%d %H:%M:%S%z, UTC), unit=TimeUnit.YEAR)""", refs={"a"}, eval_result=[0, 0, 5], expected_dtype=int, @@ -1026,7 +1024,7 @@ def test_datetime(): } ), schema={"a": datetime}, - display="SINCE_EPOCH(col('a'), unit=TimeUnit.DAY)", + display='SINCE_EPOCH(col("a"), unit=TimeUnit.DAY)', refs={"a"}, eval_result=[18628, 18660, 18689], expected_dtype=int, @@ -1045,7 +1043,7 @@ def test_datetime(): } ), schema={"a": datetime}, - display="SINCE_EPOCH(col('a'), unit=TimeUnit.YEAR)", + display='SINCE_EPOCH(col("a"), unit=TimeUnit.YEAR)', refs={"a"}, eval_result=[51, 51, 56], expected_dtype=int, @@ -1064,7 +1062,7 @@ def test_datetime(): } ), schema={"a": datetime}, - display="STRFTIME(col('a'), %Y-%m-%d)", + display='STRFTIME(col("a"), %Y-%m-%d)', refs={"a"}, eval_result=["2021-01-01", "2021-02-02", "2021-03-03"], expected_dtype=str, @@ -1083,7 +1081,7 @@ def test_datetime(): } ), schema={"a": datetime}, - display="STRFTIME(col('a'), %Y-%m-%d %H:%M:%S)", + display='STRFTIME(col("a"), %Y-%m-%d %H:%M:%S)', refs={"a"}, eval_result=[ "2021-01-01 00:01:00", @@ -1135,7 +1133,7 @@ def test_make_struct(): ), df=pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}), schema={"a": int, "b": int}, - display="""STRUCT(x=col('a'), y=(col('a') + col('b')), z="constant")""", + display="""STRUCT(x=col("a"), y=(col("a") + col("b")), z="constant")""", refs={"a", "b"}, eval_result=[ A(1, 5, "constant"), @@ -1177,7 +1175,7 @@ def test_make_struct(): "e": str, "f": List[int], }, - display="""STRUCT(a=STRUCT(x=col('a'), y=col('b'), z=col('c')), b=STRUCT(p=col('d'), q=col('e')), c=col('f'))""", + display="""STRUCT(a=STRUCT(x=col("a"), y=col("b"), z=col("c")), b=STRUCT(p=col("d"), q=col("e")), c=col("f"))""", refs={"a", "b", "c", "d", "e", "f"}, eval_result=[ Nested(A(1, 4, "str_1"), B(10, "a"), [1, 2, 3]), @@ -1200,7 +1198,7 @@ def test_from_epoch(): expr=(from_epoch(col("a"), unit="second")), df=pd.DataFrame({"a": [1725321570, 1725321570]}), schema={"a": int}, - display="""FROM_EPOCH(col('a'), unit=TimeUnit.SECOND)""", + display="""FROM_EPOCH(col("a"), unit=TimeUnit.SECOND)""", refs={"a"}, eval_result=[ pd.Timestamp("2024-09-02 23:59:30+0000", tz="UTC"), @@ -1214,7 +1212,7 @@ def test_from_epoch(): expr=(from_epoch(col("a") * col("b"), unit="millisecond")), df=pd.DataFrame({"a": [1725321570, 1725321570], "b": [1000, 1000]}), schema={"a": int, "b": int}, - display="""FROM_EPOCH((col('a') * col('b')), unit=TimeUnit.MILLISECOND)""", + display="""FROM_EPOCH((col("a") * col("b")), unit=TimeUnit.MILLISECOND)""", refs=set(["a", "b"]), eval_result=[ pd.Timestamp("2024-09-02 23:59:30+0000", tz="UTC"), @@ -1235,7 +1233,7 @@ def test_fillnull(): expr=(col("a").fillnull(0)), df=pd.DataFrame({"a": [1, 2, None, 4]}), schema={"a": Optional[int]}, - display="FILL_NULL(col('a'), 0)", + display='FILL_NULL(col("a"), 0)', refs={"a"}, eval_result=[1, 2, 0, 4], expected_dtype=int, @@ -1245,7 +1243,7 @@ def test_fillnull(): expr=(col("a").fillnull("missing")), df=pd.DataFrame({"a": ["a", "b", None, "d"]}), schema={"a": Optional[str]}, - display="FILL_NULL(col('a'), \"missing\")", + display='FILL_NULL(col("a"), "missing")', refs={"a"}, eval_result=["a", "b", "missing", "d"], expected_dtype=str, @@ -1260,7 +1258,7 @@ def test_fillnull(): ), df=pd.DataFrame({"a": ["2021-01-01", None, "2021-01-03"]}), schema={"a": Optional[str]}, - display="""FILL_NULL(STRPTIME(col('a'), %Y-%m-%d, UTC), STRPTIME("2021-01-01", %Y-%m-%d, UTC))""", + display="""FILL_NULL(STRPTIME(col("a"), %Y-%m-%d, UTC), STRPTIME("2021-01-01", %Y-%m-%d, UTC))""", refs={"a"}, eval_result=[ pd.Timestamp("2021-01-01 00:00:00+0000", tz="UTC"), @@ -1304,7 +1302,7 @@ def test_isnull(): expr=(col("a").isnull()), df=pd.DataFrame({"a": [1, 2, None, 4]}), schema={"a": Optional[int]}, - display="IS_NULL(col('a'))", + display='IS_NULL(col("a"))', refs={"a"}, eval_result=[False, False, True, False], expected_dtype=bool, @@ -1314,7 +1312,7 @@ def test_isnull(): expr=(col("a").isnull()), df=pd.DataFrame({"a": ["a", "b", None, "d"]}), schema={"a": Optional[str]}, - display="IS_NULL(col('a'))", + display='IS_NULL(col("a"))', refs={"a"}, eval_result=[False, False, True, False], expected_dtype=bool, @@ -1325,7 +1323,7 @@ def test_isnull(): expr=(col("a").isnull()), df=pd.DataFrame({"a": [A(1, 2, "a"), A(2, 3, "b"), None]}), schema={"a": Optional[A]}, - display="IS_NULL(col('a'))", + display='IS_NULL(col("a"))', refs={"a"}, eval_result=[False, False, True], expected_dtype=bool, @@ -1336,7 +1334,7 @@ def test_isnull(): expr=(col("a").isnull()), df=pd.DataFrame({"a": [[1, 2, 3], [4, 5, 6], None]}), schema={"a": Optional[List[int]]}, - display="IS_NULL(col('a'))", + display='IS_NULL(col("a"))', refs={"a"}, eval_result=[False, False, True], expected_dtype=bool, diff --git a/fennel/expr/test_invalid_expr.py b/fennel/expr/test_invalid_expr.py index 06904138b..a26f339dc 100644 --- a/fennel/expr/test_invalid_expr.py +++ b/fennel/expr/test_invalid_expr.py @@ -27,8 +27,8 @@ def test_invalid_datetime(): expr.eval(df, {"a": str}) assert ( - str(e.value) - == "Failed to compile expression: invalid timezone: America/NonYork" + "Failed to compile expression: invalid timezone: `America/NonYork`" + in str(e.value) ) df = pd.DataFrame( @@ -45,7 +45,7 @@ def test_invalid_datetime(): expr.eval(df, {"a": str}) assert ( str(e.value) - == 'Failed to evaluate expression: failed to eval expression: col(a).str.parse_datetime("%Y-%m-%d", timezone=""America/New_York""), error: invalid operation: conversion from `str` to `datetime[μs, America/New_York]` failed in column \'a\' for 3 out of 3 values: ["1", "2", "3"]' + == 'Failed to evaluate expression: failed to eval expression: col("a").str.parse_datetime("%Y-%m-%d", timezone="America/New_York"), error: invalid operation: conversion from `str` to `datetime[μs, America/New_York]` failed in column \'a\' for 3 out of 3 values: ["1", "2", "3"]' ) with pytest.raises(ValueError) as e: @@ -64,7 +64,7 @@ def test_missing_then(): df = pd.DataFrame({"a": [1, 2, 3]}) with pytest.raises(InvalidExprException) as e: expr.eval(df, {"a": int}) - assert str(e.value) == "THEN clause missing for WHEN clause col('a') == 1" + assert str(e.value) == 'THEN clause missing for WHEN clause col("a") == 1' with pytest.raises(AttributeError) as e: expr = when(col("a") == 1).when(col("a") == 2) @@ -88,7 +88,7 @@ def test_struct(): assert ( str(e.value) - == "invalid field access for struct, expected string but got col('b')" + == 'invalid field access for struct, expected string but got col("b")' ) diff --git a/fennel/expr/visitor.py b/fennel/expr/visitor.py index 8affab86f..5635fe7cd 100644 --- a/fennel/expr/visitor.py +++ b/fennel/expr/visitor.py @@ -4,6 +4,7 @@ DateTimeFromEpoch, DateTimeParts, DateTimeSince, + DateTimeLiteral, DateTimeSinceEpoch, DateTimeStrftime, ListContains, @@ -22,6 +23,7 @@ Otherwise, Binary, IsNull, + Var, FillNull, _Bool, _Dict, @@ -60,6 +62,12 @@ def visit(self, obj): elif isinstance(obj, Ref): ret = self.visitRef(obj) + elif isinstance(obj, Var): + ret = self.visitVar(obj) + + elif isinstance(obj, DateTimeLiteral): + ret = self.visitDateTimeLiteral(obj) + elif isinstance(obj, Unary): ret = self.visitUnary(obj) @@ -115,6 +123,9 @@ def visit(self, obj): def visitLiteral(self, obj): raise NotImplementedError + def visitVar(self, obj): + raise NotImplementedError + def visitRef(self, obj): raise NotImplementedError @@ -166,6 +177,9 @@ def visitMakeStruct(self, obj): def visitDateTimeFromEpoch(self, obj): raise NotImplementedError + def visitDateTimeLiteral(self, obj): + raise NotImplementedError + class ExprPrinter(Visitor): @@ -178,6 +192,9 @@ def visitLiteral(self, obj): def visitRef(self, obj): return str(obj) + def visitVar(self, obj): + return str(obj) + def visitUnary(self, obj): return "%s(%s)" % (obj.op, self.visit(obj.operand)) @@ -322,6 +339,9 @@ def visitMakeStruct(self, obj): def visitDateTimeFromEpoch(self, obj): return f"FROM_EPOCH({self.visit(obj.duration)}, unit={obj.unit})" + def visitDateTimeLiteral(self, obj): + return f"DATETIME({obj.year}, {obj.month}, {obj.day}, {obj.hour}, {obj.minute}, {obj.second}, {obj.microsecond}, timezone={obj.timezone})" + class FetchReferences(Visitor): @@ -419,3 +439,6 @@ def visitMakeStruct(self, obj): def visitDateTimeFromEpoch(self, obj): self.visit(obj.duration) + + def visitDateTimeLiteral(self, obj): + pass diff --git a/fennel/featuresets/test_invalid_featureset.py b/fennel/featuresets/test_invalid_featureset.py index 13bd08e66..a0bf9163f 100644 --- a/fennel/featuresets/test_invalid_featureset.py +++ b/fennel/featuresets/test_invalid_featureset.py @@ -420,7 +420,7 @@ class UserInfo4: assert ( str(e.value) - == "error in expression based extractor 'col('age') * col('age')'; can not set default value for expressions, maybe use fillnull instead?" + == 'error in expression based extractor \'col("age") * col("age")\'; can not set default value for expressions, maybe use fillnull instead?' ) # Incorrect type for an expression feature @@ -436,7 +436,7 @@ class UserInfo5: assert ( str(e.value) - == "expression 'col('age') * col('age')' for feature 'age_squared' is of type 'str' not 'int'" + == "expression 'col(\"age\") * col(\"age\")' for feature 'age_squared' is of type 'str' not 'int'" ) # Using dataset field in expression feature diff --git a/fennel/gen/dataset_pb2.py b/fennel/gen/dataset_pb2.py index 0c5a15da0..30befc88d 100644 --- a/fennel/gen/dataset_pb2.py +++ b/fennel/gen/dataset_pb2.py @@ -20,7 +20,7 @@ import fennel.gen.expr_pb2 as expr__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rdataset.proto\x12\x14\x66\x65nnel.proto.dataset\x1a\x1egoogle/protobuf/duration.proto\x1a\x0emetadata.proto\x1a\x0cpycode.proto\x1a\x0cschema.proto\x1a\nspec.proto\x1a\x0cwindow.proto\x1a\nexpr.proto\"\xe5\x03\n\x0b\x43oreDataset\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x31\n\x08metadata\x18\x02 \x01(\x0b\x32\x1f.fennel.proto.metadata.Metadata\x12/\n\x08\x64sschema\x18\x03 \x01(\x0b\x32\x1d.fennel.proto.schema.DSSchema\x12*\n\x07history\x18\x04 \x01(\x0b\x32\x19.google.protobuf.Duration\x12,\n\tretention\x18\x05 \x01(\x0b\x32\x19.google.protobuf.Duration\x12L\n\x0e\x66ield_metadata\x18\x06 \x03(\x0b\x32\x34.fennel.proto.dataset.CoreDataset.FieldMetadataEntry\x12+\n\x06pycode\x18\x07 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x19\n\x11is_source_dataset\x18\x08 \x01(\x08\x12\x0f\n\x07version\x18\t \x01(\r\x12\x0c\n\x04tags\x18\n \x03(\t\x1aU\n\x12\x46ieldMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12.\n\x05value\x18\x02 \x01(\x0b\x32\x1f.fennel.proto.metadata.Metadata:\x02\x38\x01\"Q\n\x08OnDemand\x12\x1c\n\x14\x66unction_source_code\x18\x01 \x01(\t\x12\x10\n\x08\x66unction\x18\x02 \x01(\x0c\x12\x15\n\rexpires_after\x18\x03 \x01(\x03\"\xd2\x01\n\x08Pipeline\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0c\x64\x61taset_name\x18\x02 \x01(\t\x12\x11\n\tsignature\x18\x03 \x01(\t\x12\x31\n\x08metadata\x18\x04 \x01(\x0b\x32\x1f.fennel.proto.metadata.Metadata\x12\x1b\n\x13input_dataset_names\x18\x05 \x03(\t\x12\x12\n\nds_version\x18\x06 \x01(\r\x12+\n\x06pycode\x18\x07 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\"\x8f\x08\n\x08Operator\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0f\n\x07is_root\x18\x02 \x01(\x08\x12\x15\n\rpipeline_name\x18\x03 \x01(\t\x12\x14\n\x0c\x64\x61taset_name\x18\x04 \x01(\t\x12\x12\n\nds_version\x18\x14 \x01(\r\x12\x34\n\taggregate\x18\x05 \x01(\x0b\x32\x1f.fennel.proto.dataset.AggregateH\x00\x12*\n\x04join\x18\x06 \x01(\x0b\x32\x1a.fennel.proto.dataset.JoinH\x00\x12\x34\n\ttransform\x18\x07 \x01(\x0b\x32\x1f.fennel.proto.dataset.TransformH\x00\x12,\n\x05union\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.dataset.UnionH\x00\x12.\n\x06\x66ilter\x18\t \x01(\x0b\x32\x1c.fennel.proto.dataset.FilterH\x00\x12\x37\n\x0b\x64\x61taset_ref\x18\n \x01(\x0b\x32 .fennel.proto.dataset.DatasetRefH\x00\x12.\n\x06rename\x18\x0c \x01(\x0b\x32\x1c.fennel.proto.dataset.RenameH\x00\x12*\n\x04\x64rop\x18\r \x01(\x0b\x32\x1a.fennel.proto.dataset.DropH\x00\x12\x30\n\x07\x65xplode\x18\x0e \x01(\x0b\x32\x1d.fennel.proto.dataset.ExplodeH\x00\x12,\n\x05\x64\x65\x64up\x18\x0f \x01(\x0b\x32\x1b.fennel.proto.dataset.DedupH\x00\x12,\n\x05\x66irst\x18\x10 \x01(\x0b\x32\x1b.fennel.proto.dataset.FirstH\x00\x12.\n\x06\x61ssign\x18\x11 \x01(\x0b\x32\x1c.fennel.proto.dataset.AssignH\x00\x12\x32\n\x08\x64ropnull\x18\x12 \x01(\x0b\x32\x1e.fennel.proto.dataset.DropnullH\x00\x12:\n\x06window\x18\x13 \x01(\x0b\x32(.fennel.proto.dataset.WindowOperatorKindH\x00\x12.\n\x06latest\x18\x15 \x01(\x0b\x32\x1c.fennel.proto.dataset.LatestH\x00\x12\x34\n\tchangelog\x18\x16 \x01(\x0b\x32\x1f.fennel.proto.dataset.ChangelogH\x00\x12\x37\n\x0b\x61ssign_expr\x18\x17 \x01(\x0b\x32 .fennel.proto.dataset.AssignExprH\x00\x12\x37\n\x0b\x66ilter_expr\x18\x18 \x01(\x0b\x32 .fennel.proto.dataset.FilterExprH\x00\x12\x0c\n\x04name\x18\x0b \x01(\tB\x06\n\x04kind\"\xc7\x01\n\tAggregate\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0c\n\x04keys\x18\x02 \x03(\t\x12)\n\x05specs\x18\x03 \x03(\x0b\x32\x1a.fennel.proto.spec.PreSpec\x12\x12\n\x05\x61long\x18\x05 \x01(\tH\x00\x88\x01\x01\x12\x39\n\remit_strategy\x18\x06 \x01(\x0e\x32\".fennel.proto.dataset.EmitStrategy\x12\x14\n\x0coperand_name\x18\x04 \x01(\tB\x08\n\x06_along\"\xb9\x03\n\x04Join\x12\x16\n\x0elhs_operand_id\x18\x01 \x01(\t\x12\x1c\n\x14rhs_dsref_operand_id\x18\x02 \x01(\t\x12.\n\x02on\x18\x03 \x03(\x0b\x32\".fennel.proto.dataset.Join.OnEntry\x12\x32\n\nwithin_low\x18\x06 \x01(\x0b\x32\x19.google.protobuf.DurationH\x00\x88\x01\x01\x12\x33\n\x0bwithin_high\x18\x07 \x01(\x0b\x32\x19.google.protobuf.DurationH\x01\x88\x01\x01\x12\x18\n\x10lhs_operand_name\x18\x04 \x01(\t\x12\x1e\n\x16rhs_dsref_operand_name\x18\x05 \x01(\t\x12+\n\x03how\x18\x08 \x01(\x0e\x32\x1e.fennel.proto.dataset.Join.How\x12\x15\n\tbroadcast\x18\t \x01(\x08\x42\x02\x18\x01\x1a)\n\x07OnEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x1a\n\x03How\x12\x08\n\x04Left\x10\x00\x12\t\n\x05Inner\x10\x01\x42\r\n\x0b_within_lowB\x0e\n\x0c_within_high\"\xed\x01\n\tTransform\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12;\n\x06schema\x18\x02 \x03(\x0b\x32+.fennel.proto.dataset.Transform.SchemaEntry\x12+\n\x06pycode\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x14\n\x0coperand_name\x18\x04 \x01(\t\x1aL\n\x0bSchemaEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12,\n\x05value\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType:\x02\x38\x01\"]\n\nFilterExpr\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12%\n\x04\x65xpr\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"_\n\x06\x46ilter\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12+\n\x06pycode\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"\xa8\x01\n\x06\x41ssign\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12+\n\x06pycode\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x13\n\x0b\x63olumn_name\x18\x03 \x01(\t\x12\x32\n\x0boutput_type\x18\x04 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType\x12\x14\n\x0coperand_name\x18\x05 \x01(\t\"\xd5\x02\n\nAssignExpr\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12:\n\x05\x65xprs\x18\x02 \x03(\x0b\x32+.fennel.proto.dataset.AssignExpr.ExprsEntry\x12G\n\x0coutput_types\x18\x03 \x03(\x0b\x32\x31.fennel.proto.dataset.AssignExpr.OutputTypesEntry\x12\x14\n\x0coperand_name\x18\x05 \x01(\t\x1a\x45\n\nExprsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12&\n\x05value\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr:\x02\x38\x01\x1aQ\n\x10OutputTypesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12,\n\x05value\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType:\x02\x38\x01\"E\n\x08\x44ropnull\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0f\n\x07\x63olumns\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"B\n\x04\x44rop\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x10\n\x08\x64ropcols\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"\xa5\x01\n\x06Rename\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12?\n\ncolumn_map\x18\x02 \x03(\x0b\x32+.fennel.proto.dataset.Rename.ColumnMapEntry\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\x1a\x30\n\x0e\x43olumnMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"3\n\x05Union\x12\x13\n\x0boperand_ids\x18\x01 \x03(\t\x12\x15\n\roperand_names\x18\x02 \x03(\t\"B\n\x05\x44\x65\x64up\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0f\n\x07\x63olumns\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"D\n\x07\x45xplode\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0f\n\x07\x63olumns\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"=\n\x05\x46irst\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\n\n\x02\x62y\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\">\n\x06Latest\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\n\n\x02\x62y\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"L\n\tChangelog\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x15\n\rdelete_column\x18\x02 \x01(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"\xcb\x01\n\x12WindowOperatorKind\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x30\n\x0bwindow_type\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.window.Window\x12\n\n\x02\x62y\x18\x03 \x03(\t\x12\r\n\x05\x66ield\x18\x04 \x01(\t\x12\x32\n\x07summary\x18\x06 \x01(\x0b\x32\x1c.fennel.proto.window.SummaryH\x00\x88\x01\x01\x12\x14\n\x0coperand_name\x18\x05 \x01(\tB\n\n\x08_summary\",\n\nDatasetRef\x12\x1e\n\x16referring_dataset_name\x18\x01 \x01(\t\"\x80\x02\n\x08\x44\x61taflow\x12\x16\n\x0c\x64\x61taset_name\x18\x01 \x01(\tH\x00\x12L\n\x11pipeline_dataflow\x18\x02 \x01(\x0b\x32/.fennel.proto.dataset.Dataflow.PipelineDataflowH\x00\x12\x0c\n\x04tags\x18\x03 \x03(\t\x1ax\n\x10PipelineDataflow\x12\x14\n\x0c\x64\x61taset_name\x18\x01 \x01(\t\x12\x15\n\rpipeline_name\x18\x02 \x01(\t\x12\x37\n\x0finput_dataflows\x18\x03 \x03(\x0b\x32\x1e.fennel.proto.dataset.DataflowB\x06\n\x04kind\"\x9c\x01\n\x10PipelineLineages\x12\x14\n\x0c\x64\x61taset_name\x18\x01 \x01(\t\x12\x15\n\rpipeline_name\x18\x02 \x01(\t\x12=\n\x0einput_datasets\x18\x03 \x03(\x0b\x32%.fennel.proto.dataset.DatasetLineages\x12\x0e\n\x06\x61\x63tive\x18\x04 \x01(\x08\x12\x0c\n\x04tags\x18\x05 \x03(\t\"\\\n\x17\x44\x61tasetPipelineLineages\x12\x41\n\x11pipeline_lineages\x18\x02 \x03(\x0b\x32&.fennel.proto.dataset.PipelineLineages\"\x8b\x01\n\x0f\x44\x61tasetLineages\x12\x18\n\x0esource_dataset\x18\x01 \x01(\tH\x00\x12H\n\x0f\x64\x65rived_dataset\x18\x02 \x01(\x0b\x32-.fennel.proto.dataset.DatasetPipelineLineagesH\x00\x12\x0c\n\x04tags\x18\x03 \x03(\tB\x06\n\x04kind*$\n\x0c\x45mitStrategy\x12\t\n\x05\x45\x61ger\x10\x00\x12\t\n\x05\x46inal\x10\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rdataset.proto\x12\x14\x66\x65nnel.proto.dataset\x1a\x1egoogle/protobuf/duration.proto\x1a\x0emetadata.proto\x1a\x0cpycode.proto\x1a\x0cschema.proto\x1a\nspec.proto\x1a\x0cwindow.proto\x1a\nexpr.proto\"\xe5\x03\n\x0b\x43oreDataset\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x31\n\x08metadata\x18\x02 \x01(\x0b\x32\x1f.fennel.proto.metadata.Metadata\x12/\n\x08\x64sschema\x18\x03 \x01(\x0b\x32\x1d.fennel.proto.schema.DSSchema\x12*\n\x07history\x18\x04 \x01(\x0b\x32\x19.google.protobuf.Duration\x12,\n\tretention\x18\x05 \x01(\x0b\x32\x19.google.protobuf.Duration\x12L\n\x0e\x66ield_metadata\x18\x06 \x03(\x0b\x32\x34.fennel.proto.dataset.CoreDataset.FieldMetadataEntry\x12+\n\x06pycode\x18\x07 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x19\n\x11is_source_dataset\x18\x08 \x01(\x08\x12\x0f\n\x07version\x18\t \x01(\r\x12\x0c\n\x04tags\x18\n \x03(\t\x1aU\n\x12\x46ieldMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12.\n\x05value\x18\x02 \x01(\x0b\x32\x1f.fennel.proto.metadata.Metadata:\x02\x38\x01\"Q\n\x08OnDemand\x12\x1c\n\x14\x66unction_source_code\x18\x01 \x01(\t\x12\x10\n\x08\x66unction\x18\x02 \x01(\x0c\x12\x15\n\rexpires_after\x18\x03 \x01(\x03\"\xd2\x01\n\x08Pipeline\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0c\x64\x61taset_name\x18\x02 \x01(\t\x12\x11\n\tsignature\x18\x03 \x01(\t\x12\x31\n\x08metadata\x18\x04 \x01(\x0b\x32\x1f.fennel.proto.metadata.Metadata\x12\x1b\n\x13input_dataset_names\x18\x05 \x03(\t\x12\x12\n\nds_version\x18\x06 \x01(\r\x12+\n\x06pycode\x18\x07 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\"\x8f\x08\n\x08Operator\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0f\n\x07is_root\x18\x02 \x01(\x08\x12\x15\n\rpipeline_name\x18\x03 \x01(\t\x12\x14\n\x0c\x64\x61taset_name\x18\x04 \x01(\t\x12\x12\n\nds_version\x18\x14 \x01(\r\x12\x34\n\taggregate\x18\x05 \x01(\x0b\x32\x1f.fennel.proto.dataset.AggregateH\x00\x12*\n\x04join\x18\x06 \x01(\x0b\x32\x1a.fennel.proto.dataset.JoinH\x00\x12\x34\n\ttransform\x18\x07 \x01(\x0b\x32\x1f.fennel.proto.dataset.TransformH\x00\x12,\n\x05union\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.dataset.UnionH\x00\x12.\n\x06\x66ilter\x18\t \x01(\x0b\x32\x1c.fennel.proto.dataset.FilterH\x00\x12\x37\n\x0b\x64\x61taset_ref\x18\n \x01(\x0b\x32 .fennel.proto.dataset.DatasetRefH\x00\x12.\n\x06rename\x18\x0c \x01(\x0b\x32\x1c.fennel.proto.dataset.RenameH\x00\x12*\n\x04\x64rop\x18\r \x01(\x0b\x32\x1a.fennel.proto.dataset.DropH\x00\x12\x30\n\x07\x65xplode\x18\x0e \x01(\x0b\x32\x1d.fennel.proto.dataset.ExplodeH\x00\x12,\n\x05\x64\x65\x64up\x18\x0f \x01(\x0b\x32\x1b.fennel.proto.dataset.DedupH\x00\x12,\n\x05\x66irst\x18\x10 \x01(\x0b\x32\x1b.fennel.proto.dataset.FirstH\x00\x12.\n\x06\x61ssign\x18\x11 \x01(\x0b\x32\x1c.fennel.proto.dataset.AssignH\x00\x12\x32\n\x08\x64ropnull\x18\x12 \x01(\x0b\x32\x1e.fennel.proto.dataset.DropnullH\x00\x12:\n\x06window\x18\x13 \x01(\x0b\x32(.fennel.proto.dataset.WindowOperatorKindH\x00\x12.\n\x06latest\x18\x15 \x01(\x0b\x32\x1c.fennel.proto.dataset.LatestH\x00\x12\x34\n\tchangelog\x18\x16 \x01(\x0b\x32\x1f.fennel.proto.dataset.ChangelogH\x00\x12\x37\n\x0b\x61ssign_expr\x18\x17 \x01(\x0b\x32 .fennel.proto.dataset.AssignExprH\x00\x12\x37\n\x0b\x66ilter_expr\x18\x18 \x01(\x0b\x32 .fennel.proto.dataset.FilterExprH\x00\x12\x0c\n\x04name\x18\x0b \x01(\tB\x06\n\x04kind\"\xc7\x01\n\tAggregate\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0c\n\x04keys\x18\x02 \x03(\t\x12)\n\x05specs\x18\x03 \x03(\x0b\x32\x1a.fennel.proto.spec.PreSpec\x12\x12\n\x05\x61long\x18\x05 \x01(\tH\x00\x88\x01\x01\x12\x39\n\remit_strategy\x18\x06 \x01(\x0e\x32\".fennel.proto.dataset.EmitStrategy\x12\x14\n\x0coperand_name\x18\x04 \x01(\tB\x08\n\x06_along\"\xcd\x03\n\x04Join\x12\x16\n\x0elhs_operand_id\x18\x01 \x01(\t\x12\x1c\n\x14rhs_dsref_operand_id\x18\x02 \x01(\t\x12.\n\x02on\x18\x03 \x03(\x0b\x32\".fennel.proto.dataset.Join.OnEntry\x12\x32\n\nwithin_low\x18\x06 \x01(\x0b\x32\x19.google.protobuf.DurationH\x00\x88\x01\x01\x12\x33\n\x0bwithin_high\x18\x07 \x01(\x0b\x32\x19.google.protobuf.DurationH\x01\x88\x01\x01\x12\x18\n\x10lhs_operand_name\x18\x04 \x01(\t\x12\x1e\n\x16rhs_dsref_operand_name\x18\x05 \x01(\t\x12+\n\x03how\x18\x08 \x01(\x0e\x32\x1e.fennel.proto.dataset.Join.How\x12\x15\n\tbroadcast\x18\t \x01(\x08\x42\x02\x18\x01\x12\x12\n\nrhs_fields\x18\n \x03(\t\x1a)\n\x07OnEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x1a\n\x03How\x12\x08\n\x04Left\x10\x00\x12\t\n\x05Inner\x10\x01\x42\r\n\x0b_within_lowB\x0e\n\x0c_within_high\"\xed\x01\n\tTransform\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12;\n\x06schema\x18\x02 \x03(\x0b\x32+.fennel.proto.dataset.Transform.SchemaEntry\x12+\n\x06pycode\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x14\n\x0coperand_name\x18\x04 \x01(\t\x1aL\n\x0bSchemaEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12,\n\x05value\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType:\x02\x38\x01\"]\n\nFilterExpr\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12%\n\x04\x65xpr\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"_\n\x06\x46ilter\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12+\n\x06pycode\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"\xa8\x01\n\x06\x41ssign\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12+\n\x06pycode\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x13\n\x0b\x63olumn_name\x18\x03 \x01(\t\x12\x32\n\x0boutput_type\x18\x04 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType\x12\x14\n\x0coperand_name\x18\x05 \x01(\t\"\xd5\x02\n\nAssignExpr\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12:\n\x05\x65xprs\x18\x02 \x03(\x0b\x32+.fennel.proto.dataset.AssignExpr.ExprsEntry\x12G\n\x0coutput_types\x18\x03 \x03(\x0b\x32\x31.fennel.proto.dataset.AssignExpr.OutputTypesEntry\x12\x14\n\x0coperand_name\x18\x05 \x01(\t\x1a\x45\n\nExprsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12&\n\x05value\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr:\x02\x38\x01\x1aQ\n\x10OutputTypesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12,\n\x05value\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType:\x02\x38\x01\"E\n\x08\x44ropnull\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0f\n\x07\x63olumns\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"B\n\x04\x44rop\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x10\n\x08\x64ropcols\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"\xa5\x01\n\x06Rename\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12?\n\ncolumn_map\x18\x02 \x03(\x0b\x32+.fennel.proto.dataset.Rename.ColumnMapEntry\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\x1a\x30\n\x0e\x43olumnMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"3\n\x05Union\x12\x13\n\x0boperand_ids\x18\x01 \x03(\t\x12\x15\n\roperand_names\x18\x02 \x03(\t\"B\n\x05\x44\x65\x64up\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0f\n\x07\x63olumns\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"D\n\x07\x45xplode\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0f\n\x07\x63olumns\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"=\n\x05\x46irst\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\n\n\x02\x62y\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\">\n\x06Latest\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\n\n\x02\x62y\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"L\n\tChangelog\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x15\n\rdelete_column\x18\x02 \x01(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"\xcb\x01\n\x12WindowOperatorKind\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x30\n\x0bwindow_type\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.window.Window\x12\n\n\x02\x62y\x18\x03 \x03(\t\x12\r\n\x05\x66ield\x18\x04 \x01(\t\x12\x32\n\x07summary\x18\x06 \x01(\x0b\x32\x1c.fennel.proto.window.SummaryH\x00\x88\x01\x01\x12\x14\n\x0coperand_name\x18\x05 \x01(\tB\n\n\x08_summary\",\n\nDatasetRef\x12\x1e\n\x16referring_dataset_name\x18\x01 \x01(\t\"\x80\x02\n\x08\x44\x61taflow\x12\x16\n\x0c\x64\x61taset_name\x18\x01 \x01(\tH\x00\x12L\n\x11pipeline_dataflow\x18\x02 \x01(\x0b\x32/.fennel.proto.dataset.Dataflow.PipelineDataflowH\x00\x12\x0c\n\x04tags\x18\x03 \x03(\t\x1ax\n\x10PipelineDataflow\x12\x14\n\x0c\x64\x61taset_name\x18\x01 \x01(\t\x12\x15\n\rpipeline_name\x18\x02 \x01(\t\x12\x37\n\x0finput_dataflows\x18\x03 \x03(\x0b\x32\x1e.fennel.proto.dataset.DataflowB\x06\n\x04kind\"\x9c\x01\n\x10PipelineLineages\x12\x14\n\x0c\x64\x61taset_name\x18\x01 \x01(\t\x12\x15\n\rpipeline_name\x18\x02 \x01(\t\x12=\n\x0einput_datasets\x18\x03 \x03(\x0b\x32%.fennel.proto.dataset.DatasetLineages\x12\x0e\n\x06\x61\x63tive\x18\x04 \x01(\x08\x12\x0c\n\x04tags\x18\x05 \x03(\t\"\\\n\x17\x44\x61tasetPipelineLineages\x12\x41\n\x11pipeline_lineages\x18\x02 \x03(\x0b\x32&.fennel.proto.dataset.PipelineLineages\"\x8b\x01\n\x0f\x44\x61tasetLineages\x12\x18\n\x0esource_dataset\x18\x01 \x01(\tH\x00\x12H\n\x0f\x64\x65rived_dataset\x18\x02 \x01(\x0b\x32-.fennel.proto.dataset.DatasetPipelineLineagesH\x00\x12\x0c\n\x04tags\x18\x03 \x03(\tB\x06\n\x04kind*$\n\x0c\x45mitStrategy\x12\t\n\x05\x45\x61ger\x10\x00\x12\t\n\x05\x46inal\x10\x01\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -41,8 +41,8 @@ _globals['_ASSIGNEXPR_OUTPUTTYPESENTRY']._serialized_options = b'8\001' _globals['_RENAME_COLUMNMAPENTRY']._options = None _globals['_RENAME_COLUMNMAPENTRY']._serialized_options = b'8\001' - _globals['_EMITSTRATEGY']._serialized_start=5181 - _globals['_EMITSTRATEGY']._serialized_end=5217 + _globals['_EMITSTRATEGY']._serialized_start=5201 + _globals['_EMITSTRATEGY']._serialized_end=5237 _globals['_COREDATASET']._serialized_start=154 _globals['_COREDATASET']._serialized_end=639 _globals['_COREDATASET_FIELDMETADATAENTRY']._serialized_start=554 @@ -56,59 +56,59 @@ _globals['_AGGREGATE']._serialized_start=1980 _globals['_AGGREGATE']._serialized_end=2179 _globals['_JOIN']._serialized_start=2182 - _globals['_JOIN']._serialized_end=2623 - _globals['_JOIN_ONENTRY']._serialized_start=2523 - _globals['_JOIN_ONENTRY']._serialized_end=2564 - _globals['_JOIN_HOW']._serialized_start=2566 - _globals['_JOIN_HOW']._serialized_end=2592 - _globals['_TRANSFORM']._serialized_start=2626 - _globals['_TRANSFORM']._serialized_end=2863 - _globals['_TRANSFORM_SCHEMAENTRY']._serialized_start=2787 - _globals['_TRANSFORM_SCHEMAENTRY']._serialized_end=2863 - _globals['_FILTEREXPR']._serialized_start=2865 - _globals['_FILTEREXPR']._serialized_end=2958 - _globals['_FILTER']._serialized_start=2960 - _globals['_FILTER']._serialized_end=3055 - _globals['_ASSIGN']._serialized_start=3058 - _globals['_ASSIGN']._serialized_end=3226 - _globals['_ASSIGNEXPR']._serialized_start=3229 - _globals['_ASSIGNEXPR']._serialized_end=3570 - _globals['_ASSIGNEXPR_EXPRSENTRY']._serialized_start=3418 - _globals['_ASSIGNEXPR_EXPRSENTRY']._serialized_end=3487 - _globals['_ASSIGNEXPR_OUTPUTTYPESENTRY']._serialized_start=3489 - _globals['_ASSIGNEXPR_OUTPUTTYPESENTRY']._serialized_end=3570 - _globals['_DROPNULL']._serialized_start=3572 - _globals['_DROPNULL']._serialized_end=3641 - _globals['_DROP']._serialized_start=3643 - _globals['_DROP']._serialized_end=3709 - _globals['_RENAME']._serialized_start=3712 - _globals['_RENAME']._serialized_end=3877 - _globals['_RENAME_COLUMNMAPENTRY']._serialized_start=3829 - _globals['_RENAME_COLUMNMAPENTRY']._serialized_end=3877 - _globals['_UNION']._serialized_start=3879 - _globals['_UNION']._serialized_end=3930 - _globals['_DEDUP']._serialized_start=3932 - _globals['_DEDUP']._serialized_end=3998 - _globals['_EXPLODE']._serialized_start=4000 - _globals['_EXPLODE']._serialized_end=4068 - _globals['_FIRST']._serialized_start=4070 - _globals['_FIRST']._serialized_end=4131 - _globals['_LATEST']._serialized_start=4133 - _globals['_LATEST']._serialized_end=4195 - _globals['_CHANGELOG']._serialized_start=4197 - _globals['_CHANGELOG']._serialized_end=4273 - _globals['_WINDOWOPERATORKIND']._serialized_start=4276 - _globals['_WINDOWOPERATORKIND']._serialized_end=4479 - _globals['_DATASETREF']._serialized_start=4481 - _globals['_DATASETREF']._serialized_end=4525 - _globals['_DATAFLOW']._serialized_start=4528 - _globals['_DATAFLOW']._serialized_end=4784 - _globals['_DATAFLOW_PIPELINEDATAFLOW']._serialized_start=4656 - _globals['_DATAFLOW_PIPELINEDATAFLOW']._serialized_end=4776 - _globals['_PIPELINELINEAGES']._serialized_start=4787 - _globals['_PIPELINELINEAGES']._serialized_end=4943 - _globals['_DATASETPIPELINELINEAGES']._serialized_start=4945 - _globals['_DATASETPIPELINELINEAGES']._serialized_end=5037 - _globals['_DATASETLINEAGES']._serialized_start=5040 - _globals['_DATASETLINEAGES']._serialized_end=5179 + _globals['_JOIN']._serialized_end=2643 + _globals['_JOIN_ONENTRY']._serialized_start=2543 + _globals['_JOIN_ONENTRY']._serialized_end=2584 + _globals['_JOIN_HOW']._serialized_start=2586 + _globals['_JOIN_HOW']._serialized_end=2612 + _globals['_TRANSFORM']._serialized_start=2646 + _globals['_TRANSFORM']._serialized_end=2883 + _globals['_TRANSFORM_SCHEMAENTRY']._serialized_start=2807 + _globals['_TRANSFORM_SCHEMAENTRY']._serialized_end=2883 + _globals['_FILTEREXPR']._serialized_start=2885 + _globals['_FILTEREXPR']._serialized_end=2978 + _globals['_FILTER']._serialized_start=2980 + _globals['_FILTER']._serialized_end=3075 + _globals['_ASSIGN']._serialized_start=3078 + _globals['_ASSIGN']._serialized_end=3246 + _globals['_ASSIGNEXPR']._serialized_start=3249 + _globals['_ASSIGNEXPR']._serialized_end=3590 + _globals['_ASSIGNEXPR_EXPRSENTRY']._serialized_start=3438 + _globals['_ASSIGNEXPR_EXPRSENTRY']._serialized_end=3507 + _globals['_ASSIGNEXPR_OUTPUTTYPESENTRY']._serialized_start=3509 + _globals['_ASSIGNEXPR_OUTPUTTYPESENTRY']._serialized_end=3590 + _globals['_DROPNULL']._serialized_start=3592 + _globals['_DROPNULL']._serialized_end=3661 + _globals['_DROP']._serialized_start=3663 + _globals['_DROP']._serialized_end=3729 + _globals['_RENAME']._serialized_start=3732 + _globals['_RENAME']._serialized_end=3897 + _globals['_RENAME_COLUMNMAPENTRY']._serialized_start=3849 + _globals['_RENAME_COLUMNMAPENTRY']._serialized_end=3897 + _globals['_UNION']._serialized_start=3899 + _globals['_UNION']._serialized_end=3950 + _globals['_DEDUP']._serialized_start=3952 + _globals['_DEDUP']._serialized_end=4018 + _globals['_EXPLODE']._serialized_start=4020 + _globals['_EXPLODE']._serialized_end=4088 + _globals['_FIRST']._serialized_start=4090 + _globals['_FIRST']._serialized_end=4151 + _globals['_LATEST']._serialized_start=4153 + _globals['_LATEST']._serialized_end=4215 + _globals['_CHANGELOG']._serialized_start=4217 + _globals['_CHANGELOG']._serialized_end=4293 + _globals['_WINDOWOPERATORKIND']._serialized_start=4296 + _globals['_WINDOWOPERATORKIND']._serialized_end=4499 + _globals['_DATASETREF']._serialized_start=4501 + _globals['_DATASETREF']._serialized_end=4545 + _globals['_DATAFLOW']._serialized_start=4548 + _globals['_DATAFLOW']._serialized_end=4804 + _globals['_DATAFLOW_PIPELINEDATAFLOW']._serialized_start=4676 + _globals['_DATAFLOW_PIPELINEDATAFLOW']._serialized_end=4796 + _globals['_PIPELINELINEAGES']._serialized_start=4807 + _globals['_PIPELINELINEAGES']._serialized_end=4963 + _globals['_DATASETPIPELINELINEAGES']._serialized_start=4965 + _globals['_DATASETPIPELINELINEAGES']._serialized_end=5057 + _globals['_DATASETLINEAGES']._serialized_start=5060 + _globals['_DATASETLINEAGES']._serialized_end=5199 # @@protoc_insertion_point(module_scope) diff --git a/fennel/gen/dataset_pb2.pyi b/fennel/gen/dataset_pb2.pyi index 4c4da1118..881427cea 100644 --- a/fennel/gen/dataset_pb2.pyi +++ b/fennel/gen/dataset_pb2.pyi @@ -371,6 +371,7 @@ class Join(google.protobuf.message.Message): RHS_DSREF_OPERAND_NAME_FIELD_NUMBER: builtins.int HOW_FIELD_NUMBER: builtins.int BROADCAST_FIELD_NUMBER: builtins.int + RHS_FIELDS_FIELD_NUMBER: builtins.int lhs_operand_id: builtins.str rhs_dsref_operand_id: builtins.str """RHS of a JOIN can only be a dataset, here it refers to the DSRef operator""" @@ -388,6 +389,9 @@ class Join(google.protobuf.message.Message): rhs_dsref_operand_name: builtins.str how: global___Join.How.ValueType broadcast: builtins.bool + @property + def rhs_fields(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: + """Only select a subset of fields from RHS, empty means select all""" def __init__( self, *, @@ -400,9 +404,10 @@ class Join(google.protobuf.message.Message): rhs_dsref_operand_name: builtins.str = ..., how: global___Join.How.ValueType = ..., broadcast: builtins.bool = ..., + rhs_fields: collections.abc.Iterable[builtins.str] | None = ..., ) -> None: ... def HasField(self, field_name: typing_extensions.Literal["_within_high", b"_within_high", "_within_low", b"_within_low", "within_high", b"within_high", "within_low", b"within_low"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["_within_high", b"_within_high", "_within_low", b"_within_low", "broadcast", b"broadcast", "how", b"how", "lhs_operand_id", b"lhs_operand_id", "lhs_operand_name", b"lhs_operand_name", "on", b"on", "rhs_dsref_operand_id", b"rhs_dsref_operand_id", "rhs_dsref_operand_name", b"rhs_dsref_operand_name", "within_high", b"within_high", "within_low", b"within_low"]) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["_within_high", b"_within_high", "_within_low", b"_within_low", "broadcast", b"broadcast", "how", b"how", "lhs_operand_id", b"lhs_operand_id", "lhs_operand_name", b"lhs_operand_name", "on", b"on", "rhs_dsref_operand_id", b"rhs_dsref_operand_id", "rhs_dsref_operand_name", b"rhs_dsref_operand_name", "rhs_fields", b"rhs_fields", "within_high", b"within_high", "within_low", b"within_low"]) -> None: ... @typing.overload def WhichOneof(self, oneof_group: typing_extensions.Literal["_within_high", b"_within_high"]) -> typing_extensions.Literal["within_high"] | None: ... @typing.overload diff --git a/fennel/gen/expr_pb2.py b/fennel/gen/expr_pb2.py index d3391b295..69df78aa2 100644 --- a/fennel/gen/expr_pb2.py +++ b/fennel/gen/expr_pb2.py @@ -14,7 +14,7 @@ import fennel.gen.schema_pb2 as schema__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\nexpr.proto\x12\x11\x66\x65nnel.proto.expr\x1a\x0cschema.proto\"\x9a\x06\n\x04\x45xpr\x12%\n\x03ref\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.RefH\x00\x12\x36\n\x0cjson_literal\x18\x02 \x01(\x0b\x32\x1e.fennel.proto.expr.JsonLiteralH\x00\x12)\n\x05unary\x18\x04 \x01(\x0b\x32\x18.fennel.proto.expr.UnaryH\x00\x12\'\n\x04\x63\x61se\x18\x05 \x01(\x0b\x32\x17.fennel.proto.expr.CaseH\x00\x12+\n\x06\x62inary\x18\x06 \x01(\x0b\x32\x19.fennel.proto.expr.BinaryH\x00\x12+\n\x06isnull\x18\x07 \x01(\x0b\x32\x19.fennel.proto.expr.IsNullH\x00\x12/\n\x08\x66illnull\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.expr.FillNullH\x00\x12,\n\x07list_fn\x18\t \x01(\x0b\x32\x19.fennel.proto.expr.ListFnH\x00\x12,\n\x07math_fn\x18\n \x01(\x0b\x32\x19.fennel.proto.expr.MathFnH\x00\x12\x30\n\tstruct_fn\x18\x0b \x01(\x0b\x32\x1b.fennel.proto.expr.StructFnH\x00\x12,\n\x07\x64ict_fn\x18\x0c \x01(\x0b\x32\x19.fennel.proto.expr.DictFnH\x00\x12\x30\n\tstring_fn\x18\r \x01(\x0b\x32\x1b.fennel.proto.expr.StringFnH\x00\x12\x34\n\x0b\x64\x61tetime_fn\x18\x0e \x01(\x0b\x32\x1d.fennel.proto.expr.DateTimeFnH\x00\x12>\n\x10\x64\x61tetime_literal\x18\x0f \x01(\x0b\x32\".fennel.proto.expr.DatetimeLiteralH\x00\x12\x34\n\x0bmake_struct\x18\x10 \x01(\x0b\x32\x1d.fennel.proto.expr.MakeStructH\x00\x12\x32\n\nfrom_epoch\x18\x11 \x01(\x0b\x32\x1c.fennel.proto.expr.FromEpochH\x00\x42\x06\n\x04node\"a\n\tFromEpoch\x12)\n\x08\x64uration\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12)\n\x04unit\x18\x02 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit\"\xad\x01\n\x0f\x44\x61tetimeLiteral\x12\x0c\n\x04year\x18\x01 \x01(\r\x12\r\n\x05month\x18\x02 \x01(\r\x12\x0b\n\x03\x64\x61y\x18\x03 \x01(\r\x12\x0c\n\x04hour\x18\x04 \x01(\r\x12\x0e\n\x06minute\x18\x05 \x01(\r\x12\x0e\n\x06second\x18\x06 \x01(\r\x12\x13\n\x0bmicrosecond\x18\x07 \x01(\r\x12-\n\x08timezone\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.expr.Timezone\"\xc5\x01\n\nMakeStruct\x12\x34\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x1f.fennel.proto.schema.StructType\x12\x39\n\x06\x66ields\x18\x02 \x03(\x0b\x32).fennel.proto.expr.MakeStruct.FieldsEntry\x1a\x46\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12&\n\x05value\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr:\x02\x38\x01\"L\n\x0bJsonLiteral\x12\x0f\n\x07literal\x18\x01 \x01(\t\x12,\n\x05\x64type\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType\"\x13\n\x03Ref\x12\x0c\n\x04name\x18\x01 \x01(\t\"Y\n\x05Unary\x12&\n\x02op\x18\x01 \x01(\x0e\x32\x1a.fennel.proto.expr.UnaryOp\x12(\n\x07operand\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"}\n\x06\x42inary\x12%\n\x04left\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12&\n\x05right\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12$\n\x02op\x18\x03 \x01(\x0e\x32\x18.fennel.proto.expr.BinOp\"b\n\x04\x43\x61se\x12.\n\twhen_then\x18\x01 \x03(\x0b\x32\x1b.fennel.proto.expr.WhenThen\x12*\n\totherwise\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"X\n\x08WhenThen\x12%\n\x04when\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x04then\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"2\n\x06IsNull\x12(\n\x07operand\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"[\n\x08\x46illNull\x12(\n\x07operand\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x04\x66ill\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"\xc3\x01\n\x06ListOp\x12%\n\x03len\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.LenH\x00\x12&\n\x03get\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.ExprH\x00\x12/\n\x08\x63ontains\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.expr.ContainsH\x00\x12.\n\x08has_null\x18\x04 \x01(\x0b\x32\x1a.fennel.proto.expr.HasNullH\x00\x42\t\n\x07\x66n_type\"\x05\n\x03Len\"\t\n\x07HasNull\"4\n\x08\x43ontains\x12(\n\x07\x65lement\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"V\n\x06ListFn\x12%\n\x04list\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x02\x66n\x18\x02 \x01(\x0b\x32\x19.fennel.proto.expr.ListOp\"\xb9\x01\n\x06MathOp\x12)\n\x05round\x18\x01 \x01(\x0b\x32\x18.fennel.proto.expr.RoundH\x00\x12%\n\x03\x61\x62s\x18\x02 \x01(\x0b\x32\x16.fennel.proto.expr.AbsH\x00\x12\'\n\x04\x63\x65il\x18\x03 \x01(\x0b\x32\x17.fennel.proto.expr.CeilH\x00\x12)\n\x05\x66loor\x18\x04 \x01(\x0b\x32\x18.fennel.proto.expr.FloorH\x00\x42\t\n\x07\x66n_type\"\x1a\n\x05Round\x12\x11\n\tprecision\x18\x01 \x01(\x05\"\x05\n\x03\x41\x62s\"\x06\n\x04\x43\x65il\"\x07\n\x05\x46loor\"Y\n\x06MathFn\x12(\n\x07operand\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x02\x66n\x18\x02 \x01(\x0b\x32\x19.fennel.proto.expr.MathOp\"&\n\x08StructOp\x12\x0f\n\x05\x66ield\x18\x01 \x01(\tH\x00\x42\t\n\x07\x66n_type\"\\\n\x08StructFn\x12\'\n\x06struct\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12\'\n\x02\x66n\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.StructOp\"a\n\x07\x44ictGet\x12&\n\x05\x66ield\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12.\n\rdefault_value\x18\x03 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"\x96\x01\n\x06\x44ictOp\x12%\n\x03len\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.LenH\x00\x12)\n\x03get\x18\x02 \x01(\x0b\x32\x1a.fennel.proto.expr.DictGetH\x00\x12/\n\x08\x63ontains\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.expr.ContainsH\x00\x42\t\n\x07\x66n_type\"V\n\x06\x44ictFn\x12%\n\x04\x64ict\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x02\x66n\x18\x02 \x01(\x0b\x32\x19.fennel.proto.expr.DictOp\"\xc5\x03\n\x08StringOp\x12%\n\x03len\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.LenH\x00\x12-\n\x07tolower\x18\x02 \x01(\x0b\x32\x1a.fennel.proto.expr.ToLowerH\x00\x12-\n\x07toupper\x18\x03 \x01(\x0b\x32\x1a.fennel.proto.expr.ToUpperH\x00\x12/\n\x08\x63ontains\x18\x04 \x01(\x0b\x32\x1b.fennel.proto.expr.ContainsH\x00\x12\x33\n\nstartswith\x18\x05 \x01(\x0b\x32\x1d.fennel.proto.expr.StartsWithH\x00\x12/\n\x08\x65ndswith\x18\x06 \x01(\x0b\x32\x1b.fennel.proto.expr.EndsWithH\x00\x12+\n\x06\x63oncat\x18\x07 \x01(\x0b\x32\x19.fennel.proto.expr.ConcatH\x00\x12/\n\x08strptime\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.expr.StrptimeH\x00\x12\x34\n\x0bjson_decode\x18\t \x01(\x0b\x32\x1d.fennel.proto.expr.JsonDecodeH\x00\x42\t\n\x07\x66n_type\"\x1c\n\x08Timezone\x12\x10\n\x08timezone\x18\x01 \x01(\t\":\n\nJsonDecode\x12,\n\x05\x64type\x18\x01 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType\"I\n\x08Strptime\x12\x0e\n\x06\x66ormat\x18\x01 \x01(\t\x12-\n\x08timezone\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.Timezone\"\t\n\x07ToLower\"\t\n\x07ToUpper\"2\n\nStartsWith\x12$\n\x03key\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"0\n\x08\x45ndsWith\x12$\n\x03key\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"0\n\x06\x43oncat\x12&\n\x05other\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"\\\n\x08StringFn\x12\'\n\x06string\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12\'\n\x02\x66n\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.StringOp\"b\n\nDateTimeFn\x12)\n\x08\x64\x61tetime\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12)\n\x02\x66n\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.expr.DateTimeOp\"\xd2\x01\n\nDateTimeOp\x12)\n\x05since\x18\x01 \x01(\x0b\x32\x18.fennel.proto.expr.SinceH\x00\x12\x34\n\x0bsince_epoch\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.expr.SinceEpochH\x00\x12/\n\x08strftime\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.expr.StrftimeH\x00\x12\'\n\x04part\x18\x04 \x01(\x0b\x32\x17.fennel.proto.expr.PartH\x00\x42\t\n\x07\x66n_type\"Z\n\x05Since\x12&\n\x05other\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12)\n\x04unit\x18\x02 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit\"7\n\nSinceEpoch\x12)\n\x04unit\x18\x01 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit\"\x1a\n\x08Strftime\x12\x0e\n\x06\x66ormat\x18\x01 \x01(\t\"1\n\x04Part\x12)\n\x04unit\x18\x01 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit*\x1b\n\x07UnaryOp\x12\x07\n\x03NEG\x10\x00\x12\x07\n\x03NOT\x10\x01*\x86\x01\n\x05\x42inOp\x12\x07\n\x03\x41\x44\x44\x10\x00\x12\x07\n\x03SUB\x10\x01\x12\x07\n\x03MUL\x10\x02\x12\x07\n\x03\x44IV\x10\x03\x12\x07\n\x03MOD\x10\x04\x12\r\n\tFLOOR_DIV\x10\x05\x12\x06\n\x02\x45Q\x10\x06\x12\x06\n\x02NE\x10\x07\x12\x06\n\x02GT\x10\x08\x12\x07\n\x03GTE\x10\t\x12\x06\n\x02LT\x10\n\x12\x07\n\x03LTE\x10\x0b\x12\x07\n\x03\x41ND\x10\x0c\x12\x06\n\x02OR\x10\r*\x83\x01\n\x08TimeUnit\x12\x0b\n\x07UNKNOWN\x10\x00\x12\n\n\x06SECOND\x10\x01\x12\n\n\x06MINUTE\x10\x02\x12\x08\n\x04HOUR\x10\x03\x12\x07\n\x03\x44\x41Y\x10\x04\x12\x08\n\x04WEEK\x10\x05\x12\t\n\x05MONTH\x10\x06\x12\x08\n\x04YEAR\x10\x07\x12\x0f\n\x0bMICROSECOND\x10\x08\x12\x0f\n\x0bMILLISECOND\x10\tb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\nexpr.proto\x12\x11\x66\x65nnel.proto.expr\x1a\x0cschema.proto\"\xc1\x06\n\x04\x45xpr\x12%\n\x03ref\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.RefH\x00\x12\x36\n\x0cjson_literal\x18\x02 \x01(\x0b\x32\x1e.fennel.proto.expr.JsonLiteralH\x00\x12)\n\x05unary\x18\x04 \x01(\x0b\x32\x18.fennel.proto.expr.UnaryH\x00\x12\'\n\x04\x63\x61se\x18\x05 \x01(\x0b\x32\x17.fennel.proto.expr.CaseH\x00\x12+\n\x06\x62inary\x18\x06 \x01(\x0b\x32\x19.fennel.proto.expr.BinaryH\x00\x12+\n\x06isnull\x18\x07 \x01(\x0b\x32\x19.fennel.proto.expr.IsNullH\x00\x12/\n\x08\x66illnull\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.expr.FillNullH\x00\x12,\n\x07list_fn\x18\t \x01(\x0b\x32\x19.fennel.proto.expr.ListFnH\x00\x12,\n\x07math_fn\x18\n \x01(\x0b\x32\x19.fennel.proto.expr.MathFnH\x00\x12\x30\n\tstruct_fn\x18\x0b \x01(\x0b\x32\x1b.fennel.proto.expr.StructFnH\x00\x12,\n\x07\x64ict_fn\x18\x0c \x01(\x0b\x32\x19.fennel.proto.expr.DictFnH\x00\x12\x30\n\tstring_fn\x18\r \x01(\x0b\x32\x1b.fennel.proto.expr.StringFnH\x00\x12\x34\n\x0b\x64\x61tetime_fn\x18\x0e \x01(\x0b\x32\x1d.fennel.proto.expr.DateTimeFnH\x00\x12>\n\x10\x64\x61tetime_literal\x18\x0f \x01(\x0b\x32\".fennel.proto.expr.DatetimeLiteralH\x00\x12\x34\n\x0bmake_struct\x18\x10 \x01(\x0b\x32\x1d.fennel.proto.expr.MakeStructH\x00\x12\x32\n\nfrom_epoch\x18\x11 \x01(\x0b\x32\x1c.fennel.proto.expr.FromEpochH\x00\x12%\n\x03var\x18\x12 \x01(\x0b\x32\x16.fennel.proto.expr.VarH\x00\x42\x06\n\x04node\"\x13\n\x03Var\x12\x0c\n\x04name\x18\x01 \x01(\t\"a\n\tFromEpoch\x12)\n\x08\x64uration\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12)\n\x04unit\x18\x02 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit\"\xad\x01\n\x0f\x44\x61tetimeLiteral\x12\x0c\n\x04year\x18\x01 \x01(\r\x12\r\n\x05month\x18\x02 \x01(\r\x12\x0b\n\x03\x64\x61y\x18\x03 \x01(\r\x12\x0c\n\x04hour\x18\x04 \x01(\r\x12\x0e\n\x06minute\x18\x05 \x01(\r\x12\x0e\n\x06second\x18\x06 \x01(\r\x12\x13\n\x0bmicrosecond\x18\x07 \x01(\r\x12-\n\x08timezone\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.expr.Timezone\"\xc5\x01\n\nMakeStruct\x12\x34\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x1f.fennel.proto.schema.StructType\x12\x39\n\x06\x66ields\x18\x02 \x03(\x0b\x32).fennel.proto.expr.MakeStruct.FieldsEntry\x1a\x46\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12&\n\x05value\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr:\x02\x38\x01\"L\n\x0bJsonLiteral\x12\x0f\n\x07literal\x18\x01 \x01(\t\x12,\n\x05\x64type\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType\"\x13\n\x03Ref\x12\x0c\n\x04name\x18\x01 \x01(\t\"Y\n\x05Unary\x12&\n\x02op\x18\x01 \x01(\x0e\x32\x1a.fennel.proto.expr.UnaryOp\x12(\n\x07operand\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"}\n\x06\x42inary\x12%\n\x04left\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12&\n\x05right\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12$\n\x02op\x18\x03 \x01(\x0e\x32\x18.fennel.proto.expr.BinOp\"b\n\x04\x43\x61se\x12.\n\twhen_then\x18\x01 \x03(\x0b\x32\x1b.fennel.proto.expr.WhenThen\x12*\n\totherwise\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"X\n\x08WhenThen\x12%\n\x04when\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x04then\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"2\n\x06IsNull\x12(\n\x07operand\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"[\n\x08\x46illNull\x12(\n\x07operand\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x04\x66ill\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"\xa3\x04\n\x06ListOp\x12%\n\x03len\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.LenH\x00\x12&\n\x03get\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.ExprH\x00\x12/\n\x08\x63ontains\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.expr.ContainsH\x00\x12.\n\x08has_null\x18\x04 \x01(\x0b\x32\x1a.fennel.proto.expr.HasNullH\x00\x12)\n\x03sum\x18\x05 \x01(\x0b\x32\x1a.fennel.proto.expr.ListSumH\x00\x12)\n\x03min\x18\x06 \x01(\x0b\x32\x1a.fennel.proto.expr.ListMinH\x00\x12)\n\x03max\x18\x07 \x01(\x0b\x32\x1a.fennel.proto.expr.ListMaxH\x00\x12)\n\x03\x61ll\x18\x08 \x01(\x0b\x32\x1a.fennel.proto.expr.ListAllH\x00\x12)\n\x03\x61ny\x18\t \x01(\x0b\x32\x1a.fennel.proto.expr.ListAnyH\x00\x12+\n\x04mean\x18\n \x01(\x0b\x32\x1b.fennel.proto.expr.ListMeanH\x00\x12/\n\x06\x66ilter\x18\x0b \x01(\x0b\x32\x1d.fennel.proto.expr.ListFilterH\x00\x12)\n\x03map\x18\x0c \x01(\x0b\x32\x1a.fennel.proto.expr.ListMapH\x00\x42\t\n\x07\x66n_type\"E\n\nListFilter\x12\x0b\n\x03var\x18\x01 \x01(\t\x12*\n\tpredicate\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"A\n\x07ListMap\x12\x0b\n\x03var\x18\x01 \x01(\t\x12)\n\x08map_expr\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"\t\n\x07ListSum\"\t\n\x07ListMin\"\n\n\x08ListMean\"\t\n\x07ListMax\"\t\n\x07ListAll\"\t\n\x07ListAny\"\x05\n\x03Len\"\t\n\x07HasNull\"4\n\x08\x43ontains\x12(\n\x07\x65lement\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"V\n\x06ListFn\x12%\n\x04list\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x02\x66n\x18\x02 \x01(\x0b\x32\x19.fennel.proto.expr.ListOp\"\xb9\x01\n\x06MathOp\x12)\n\x05round\x18\x01 \x01(\x0b\x32\x18.fennel.proto.expr.RoundH\x00\x12%\n\x03\x61\x62s\x18\x02 \x01(\x0b\x32\x16.fennel.proto.expr.AbsH\x00\x12\'\n\x04\x63\x65il\x18\x03 \x01(\x0b\x32\x17.fennel.proto.expr.CeilH\x00\x12)\n\x05\x66loor\x18\x04 \x01(\x0b\x32\x18.fennel.proto.expr.FloorH\x00\x42\t\n\x07\x66n_type\"\x1a\n\x05Round\x12\x11\n\tprecision\x18\x01 \x01(\x05\"\x05\n\x03\x41\x62s\"\x06\n\x04\x43\x65il\"\x07\n\x05\x46loor\"Y\n\x06MathFn\x12(\n\x07operand\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x02\x66n\x18\x02 \x01(\x0b\x32\x19.fennel.proto.expr.MathOp\"&\n\x08StructOp\x12\x0f\n\x05\x66ield\x18\x01 \x01(\tH\x00\x42\t\n\x07\x66n_type\"\\\n\x08StructFn\x12\'\n\x06struct\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12\'\n\x02\x66n\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.StructOp\"a\n\x07\x44ictGet\x12&\n\x05\x66ield\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12.\n\rdefault_value\x18\x03 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"\x96\x01\n\x06\x44ictOp\x12%\n\x03len\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.LenH\x00\x12)\n\x03get\x18\x02 \x01(\x0b\x32\x1a.fennel.proto.expr.DictGetH\x00\x12/\n\x08\x63ontains\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.expr.ContainsH\x00\x42\t\n\x07\x66n_type\"V\n\x06\x44ictFn\x12%\n\x04\x64ict\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x02\x66n\x18\x02 \x01(\x0b\x32\x19.fennel.proto.expr.DictOp\"\xc5\x03\n\x08StringOp\x12%\n\x03len\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.LenH\x00\x12-\n\x07tolower\x18\x02 \x01(\x0b\x32\x1a.fennel.proto.expr.ToLowerH\x00\x12-\n\x07toupper\x18\x03 \x01(\x0b\x32\x1a.fennel.proto.expr.ToUpperH\x00\x12/\n\x08\x63ontains\x18\x04 \x01(\x0b\x32\x1b.fennel.proto.expr.ContainsH\x00\x12\x33\n\nstartswith\x18\x05 \x01(\x0b\x32\x1d.fennel.proto.expr.StartsWithH\x00\x12/\n\x08\x65ndswith\x18\x06 \x01(\x0b\x32\x1b.fennel.proto.expr.EndsWithH\x00\x12+\n\x06\x63oncat\x18\x07 \x01(\x0b\x32\x19.fennel.proto.expr.ConcatH\x00\x12/\n\x08strptime\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.expr.StrptimeH\x00\x12\x34\n\x0bjson_decode\x18\t \x01(\x0b\x32\x1d.fennel.proto.expr.JsonDecodeH\x00\x42\t\n\x07\x66n_type\"\x1c\n\x08Timezone\x12\x10\n\x08timezone\x18\x01 \x01(\t\":\n\nJsonDecode\x12,\n\x05\x64type\x18\x01 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType\"I\n\x08Strptime\x12\x0e\n\x06\x66ormat\x18\x01 \x01(\t\x12-\n\x08timezone\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.Timezone\"\t\n\x07ToLower\"\t\n\x07ToUpper\"2\n\nStartsWith\x12$\n\x03key\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"0\n\x08\x45ndsWith\x12$\n\x03key\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"0\n\x06\x43oncat\x12&\n\x05other\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"\\\n\x08StringFn\x12\'\n\x06string\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12\'\n\x02\x66n\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.StringOp\"b\n\nDateTimeFn\x12)\n\x08\x64\x61tetime\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12)\n\x02\x66n\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.expr.DateTimeOp\"\xd2\x01\n\nDateTimeOp\x12)\n\x05since\x18\x01 \x01(\x0b\x32\x18.fennel.proto.expr.SinceH\x00\x12\x34\n\x0bsince_epoch\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.expr.SinceEpochH\x00\x12/\n\x08strftime\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.expr.StrftimeH\x00\x12\'\n\x04part\x18\x04 \x01(\x0b\x32\x17.fennel.proto.expr.PartH\x00\x42\t\n\x07\x66n_type\"Z\n\x05Since\x12&\n\x05other\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12)\n\x04unit\x18\x02 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit\"7\n\nSinceEpoch\x12)\n\x04unit\x18\x01 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit\"I\n\x08Strftime\x12\x0e\n\x06\x66ormat\x18\x01 \x01(\t\x12-\n\x08timezone\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.Timezone\"`\n\x04Part\x12)\n\x04unit\x18\x01 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit\x12-\n\x08timezone\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.Timezone*\x1b\n\x07UnaryOp\x12\x07\n\x03NEG\x10\x00\x12\x07\n\x03NOT\x10\x01*\x86\x01\n\x05\x42inOp\x12\x07\n\x03\x41\x44\x44\x10\x00\x12\x07\n\x03SUB\x10\x01\x12\x07\n\x03MUL\x10\x02\x12\x07\n\x03\x44IV\x10\x03\x12\x07\n\x03MOD\x10\x04\x12\r\n\tFLOOR_DIV\x10\x05\x12\x06\n\x02\x45Q\x10\x06\x12\x06\n\x02NE\x10\x07\x12\x06\n\x02GT\x10\x08\x12\x07\n\x03GTE\x10\t\x12\x06\n\x02LT\x10\n\x12\x07\n\x03LTE\x10\x0b\x12\x07\n\x03\x41ND\x10\x0c\x12\x06\n\x02OR\x10\r*\x83\x01\n\x08TimeUnit\x12\x0b\n\x07UNKNOWN\x10\x00\x12\n\n\x06SECOND\x10\x01\x12\n\n\x06MINUTE\x10\x02\x12\x08\n\x04HOUR\x10\x03\x12\x07\n\x03\x44\x41Y\x10\x04\x12\x08\n\x04WEEK\x10\x05\x12\t\n\x05MONTH\x10\x06\x12\x08\n\x04YEAR\x10\x07\x12\x0f\n\x0bMICROSECOND\x10\x08\x12\x0f\n\x0bMILLISECOND\x10\tb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -23,100 +23,118 @@ DESCRIPTOR._options = None _globals['_MAKESTRUCT_FIELDSENTRY']._options = None _globals['_MAKESTRUCT_FIELDSENTRY']._serialized_options = b'8\001' - _globals['_UNARYOP']._serialized_start=4564 - _globals['_UNARYOP']._serialized_end=4591 - _globals['_BINOP']._serialized_start=4594 - _globals['_BINOP']._serialized_end=4728 - _globals['_TIMEUNIT']._serialized_start=4731 - _globals['_TIMEUNIT']._serialized_end=4862 + _globals['_UNARYOP']._serialized_start=5275 + _globals['_UNARYOP']._serialized_end=5302 + _globals['_BINOP']._serialized_start=5305 + _globals['_BINOP']._serialized_end=5439 + _globals['_TIMEUNIT']._serialized_start=5442 + _globals['_TIMEUNIT']._serialized_end=5573 _globals['_EXPR']._serialized_start=48 - _globals['_EXPR']._serialized_end=842 - _globals['_FROMEPOCH']._serialized_start=844 - _globals['_FROMEPOCH']._serialized_end=941 - _globals['_DATETIMELITERAL']._serialized_start=944 - _globals['_DATETIMELITERAL']._serialized_end=1117 - _globals['_MAKESTRUCT']._serialized_start=1120 - _globals['_MAKESTRUCT']._serialized_end=1317 - _globals['_MAKESTRUCT_FIELDSENTRY']._serialized_start=1247 - _globals['_MAKESTRUCT_FIELDSENTRY']._serialized_end=1317 - _globals['_JSONLITERAL']._serialized_start=1319 - _globals['_JSONLITERAL']._serialized_end=1395 - _globals['_REF']._serialized_start=1397 - _globals['_REF']._serialized_end=1416 - _globals['_UNARY']._serialized_start=1418 - _globals['_UNARY']._serialized_end=1507 - _globals['_BINARY']._serialized_start=1509 - _globals['_BINARY']._serialized_end=1634 - _globals['_CASE']._serialized_start=1636 - _globals['_CASE']._serialized_end=1734 - _globals['_WHENTHEN']._serialized_start=1736 - _globals['_WHENTHEN']._serialized_end=1824 - _globals['_ISNULL']._serialized_start=1826 - _globals['_ISNULL']._serialized_end=1876 - _globals['_FILLNULL']._serialized_start=1878 - _globals['_FILLNULL']._serialized_end=1969 - _globals['_LISTOP']._serialized_start=1972 - _globals['_LISTOP']._serialized_end=2167 - _globals['_LEN']._serialized_start=2169 - _globals['_LEN']._serialized_end=2174 - _globals['_HASNULL']._serialized_start=2176 - _globals['_HASNULL']._serialized_end=2185 - _globals['_CONTAINS']._serialized_start=2187 - _globals['_CONTAINS']._serialized_end=2239 - _globals['_LISTFN']._serialized_start=2241 - _globals['_LISTFN']._serialized_end=2327 - _globals['_MATHOP']._serialized_start=2330 - _globals['_MATHOP']._serialized_end=2515 - _globals['_ROUND']._serialized_start=2517 - _globals['_ROUND']._serialized_end=2543 - _globals['_ABS']._serialized_start=2545 - _globals['_ABS']._serialized_end=2550 - _globals['_CEIL']._serialized_start=2552 - _globals['_CEIL']._serialized_end=2558 - _globals['_FLOOR']._serialized_start=2560 - _globals['_FLOOR']._serialized_end=2567 - _globals['_MATHFN']._serialized_start=2569 - _globals['_MATHFN']._serialized_end=2658 - _globals['_STRUCTOP']._serialized_start=2660 - _globals['_STRUCTOP']._serialized_end=2698 - _globals['_STRUCTFN']._serialized_start=2700 - _globals['_STRUCTFN']._serialized_end=2792 - _globals['_DICTGET']._serialized_start=2794 - _globals['_DICTGET']._serialized_end=2891 - _globals['_DICTOP']._serialized_start=2894 - _globals['_DICTOP']._serialized_end=3044 - _globals['_DICTFN']._serialized_start=3046 - _globals['_DICTFN']._serialized_end=3132 - _globals['_STRINGOP']._serialized_start=3135 - _globals['_STRINGOP']._serialized_end=3588 - _globals['_TIMEZONE']._serialized_start=3590 - _globals['_TIMEZONE']._serialized_end=3618 - _globals['_JSONDECODE']._serialized_start=3620 - _globals['_JSONDECODE']._serialized_end=3678 - _globals['_STRPTIME']._serialized_start=3680 - _globals['_STRPTIME']._serialized_end=3753 - _globals['_TOLOWER']._serialized_start=3755 - _globals['_TOLOWER']._serialized_end=3764 - _globals['_TOUPPER']._serialized_start=3766 - _globals['_TOUPPER']._serialized_end=3775 - _globals['_STARTSWITH']._serialized_start=3777 - _globals['_STARTSWITH']._serialized_end=3827 - _globals['_ENDSWITH']._serialized_start=3829 - _globals['_ENDSWITH']._serialized_end=3877 - _globals['_CONCAT']._serialized_start=3879 - _globals['_CONCAT']._serialized_end=3927 - _globals['_STRINGFN']._serialized_start=3929 - _globals['_STRINGFN']._serialized_end=4021 - _globals['_DATETIMEFN']._serialized_start=4023 - _globals['_DATETIMEFN']._serialized_end=4121 - _globals['_DATETIMEOP']._serialized_start=4124 - _globals['_DATETIMEOP']._serialized_end=4334 - _globals['_SINCE']._serialized_start=4336 - _globals['_SINCE']._serialized_end=4426 - _globals['_SINCEEPOCH']._serialized_start=4428 - _globals['_SINCEEPOCH']._serialized_end=4483 - _globals['_STRFTIME']._serialized_start=4485 - _globals['_STRFTIME']._serialized_end=4511 - _globals['_PART']._serialized_start=4513 - _globals['_PART']._serialized_end=4562 + _globals['_EXPR']._serialized_end=881 + _globals['_VAR']._serialized_start=883 + _globals['_VAR']._serialized_end=902 + _globals['_FROMEPOCH']._serialized_start=904 + _globals['_FROMEPOCH']._serialized_end=1001 + _globals['_DATETIMELITERAL']._serialized_start=1004 + _globals['_DATETIMELITERAL']._serialized_end=1177 + _globals['_MAKESTRUCT']._serialized_start=1180 + _globals['_MAKESTRUCT']._serialized_end=1377 + _globals['_MAKESTRUCT_FIELDSENTRY']._serialized_start=1307 + _globals['_MAKESTRUCT_FIELDSENTRY']._serialized_end=1377 + _globals['_JSONLITERAL']._serialized_start=1379 + _globals['_JSONLITERAL']._serialized_end=1455 + _globals['_REF']._serialized_start=1457 + _globals['_REF']._serialized_end=1476 + _globals['_UNARY']._serialized_start=1478 + _globals['_UNARY']._serialized_end=1567 + _globals['_BINARY']._serialized_start=1569 + _globals['_BINARY']._serialized_end=1694 + _globals['_CASE']._serialized_start=1696 + _globals['_CASE']._serialized_end=1794 + _globals['_WHENTHEN']._serialized_start=1796 + _globals['_WHENTHEN']._serialized_end=1884 + _globals['_ISNULL']._serialized_start=1886 + _globals['_ISNULL']._serialized_end=1936 + _globals['_FILLNULL']._serialized_start=1938 + _globals['_FILLNULL']._serialized_end=2029 + _globals['_LISTOP']._serialized_start=2032 + _globals['_LISTOP']._serialized_end=2579 + _globals['_LISTFILTER']._serialized_start=2581 + _globals['_LISTFILTER']._serialized_end=2650 + _globals['_LISTMAP']._serialized_start=2652 + _globals['_LISTMAP']._serialized_end=2717 + _globals['_LISTSUM']._serialized_start=2719 + _globals['_LISTSUM']._serialized_end=2728 + _globals['_LISTMIN']._serialized_start=2730 + _globals['_LISTMIN']._serialized_end=2739 + _globals['_LISTMEAN']._serialized_start=2741 + _globals['_LISTMEAN']._serialized_end=2751 + _globals['_LISTMAX']._serialized_start=2753 + _globals['_LISTMAX']._serialized_end=2762 + _globals['_LISTALL']._serialized_start=2764 + _globals['_LISTALL']._serialized_end=2773 + _globals['_LISTANY']._serialized_start=2775 + _globals['_LISTANY']._serialized_end=2784 + _globals['_LEN']._serialized_start=2786 + _globals['_LEN']._serialized_end=2791 + _globals['_HASNULL']._serialized_start=2793 + _globals['_HASNULL']._serialized_end=2802 + _globals['_CONTAINS']._serialized_start=2804 + _globals['_CONTAINS']._serialized_end=2856 + _globals['_LISTFN']._serialized_start=2858 + _globals['_LISTFN']._serialized_end=2944 + _globals['_MATHOP']._serialized_start=2947 + _globals['_MATHOP']._serialized_end=3132 + _globals['_ROUND']._serialized_start=3134 + _globals['_ROUND']._serialized_end=3160 + _globals['_ABS']._serialized_start=3162 + _globals['_ABS']._serialized_end=3167 + _globals['_CEIL']._serialized_start=3169 + _globals['_CEIL']._serialized_end=3175 + _globals['_FLOOR']._serialized_start=3177 + _globals['_FLOOR']._serialized_end=3184 + _globals['_MATHFN']._serialized_start=3186 + _globals['_MATHFN']._serialized_end=3275 + _globals['_STRUCTOP']._serialized_start=3277 + _globals['_STRUCTOP']._serialized_end=3315 + _globals['_STRUCTFN']._serialized_start=3317 + _globals['_STRUCTFN']._serialized_end=3409 + _globals['_DICTGET']._serialized_start=3411 + _globals['_DICTGET']._serialized_end=3508 + _globals['_DICTOP']._serialized_start=3511 + _globals['_DICTOP']._serialized_end=3661 + _globals['_DICTFN']._serialized_start=3663 + _globals['_DICTFN']._serialized_end=3749 + _globals['_STRINGOP']._serialized_start=3752 + _globals['_STRINGOP']._serialized_end=4205 + _globals['_TIMEZONE']._serialized_start=4207 + _globals['_TIMEZONE']._serialized_end=4235 + _globals['_JSONDECODE']._serialized_start=4237 + _globals['_JSONDECODE']._serialized_end=4295 + _globals['_STRPTIME']._serialized_start=4297 + _globals['_STRPTIME']._serialized_end=4370 + _globals['_TOLOWER']._serialized_start=4372 + _globals['_TOLOWER']._serialized_end=4381 + _globals['_TOUPPER']._serialized_start=4383 + _globals['_TOUPPER']._serialized_end=4392 + _globals['_STARTSWITH']._serialized_start=4394 + _globals['_STARTSWITH']._serialized_end=4444 + _globals['_ENDSWITH']._serialized_start=4446 + _globals['_ENDSWITH']._serialized_end=4494 + _globals['_CONCAT']._serialized_start=4496 + _globals['_CONCAT']._serialized_end=4544 + _globals['_STRINGFN']._serialized_start=4546 + _globals['_STRINGFN']._serialized_end=4638 + _globals['_DATETIMEFN']._serialized_start=4640 + _globals['_DATETIMEFN']._serialized_end=4738 + _globals['_DATETIMEOP']._serialized_start=4741 + _globals['_DATETIMEOP']._serialized_end=4951 + _globals['_SINCE']._serialized_start=4953 + _globals['_SINCE']._serialized_end=5043 + _globals['_SINCEEPOCH']._serialized_start=5045 + _globals['_SINCEEPOCH']._serialized_end=5100 + _globals['_STRFTIME']._serialized_start=5102 + _globals['_STRFTIME']._serialized_end=5175 + _globals['_PART']._serialized_start=5177 + _globals['_PART']._serialized_end=5273 # @@protoc_insertion_point(module_scope) diff --git a/fennel/gen/expr_pb2.pyi b/fennel/gen/expr_pb2.pyi index b423c09cc..9c2911dd6 100644 --- a/fennel/gen/expr_pb2.pyi +++ b/fennel/gen/expr_pb2.pyi @@ -130,6 +130,7 @@ class Expr(google.protobuf.message.Message): DATETIME_LITERAL_FIELD_NUMBER: builtins.int MAKE_STRUCT_FIELD_NUMBER: builtins.int FROM_EPOCH_FIELD_NUMBER: builtins.int + VAR_FIELD_NUMBER: builtins.int @property def ref(self) -> global___Ref: ... @property @@ -163,6 +164,8 @@ class Expr(google.protobuf.message.Message): def make_struct(self) -> global___MakeStruct: ... @property def from_epoch(self) -> global___FromEpoch: ... + @property + def var(self) -> global___Var: ... def __init__( self, *, @@ -182,13 +185,29 @@ class Expr(google.protobuf.message.Message): datetime_literal: global___DatetimeLiteral | None = ..., make_struct: global___MakeStruct | None = ..., from_epoch: global___FromEpoch | None = ..., + var: global___Var | None = ..., ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["binary", b"binary", "case", b"case", "datetime_fn", b"datetime_fn", "datetime_literal", b"datetime_literal", "dict_fn", b"dict_fn", "fillnull", b"fillnull", "from_epoch", b"from_epoch", "isnull", b"isnull", "json_literal", b"json_literal", "list_fn", b"list_fn", "make_struct", b"make_struct", "math_fn", b"math_fn", "node", b"node", "ref", b"ref", "string_fn", b"string_fn", "struct_fn", b"struct_fn", "unary", b"unary"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["binary", b"binary", "case", b"case", "datetime_fn", b"datetime_fn", "datetime_literal", b"datetime_literal", "dict_fn", b"dict_fn", "fillnull", b"fillnull", "from_epoch", b"from_epoch", "isnull", b"isnull", "json_literal", b"json_literal", "list_fn", b"list_fn", "make_struct", b"make_struct", "math_fn", b"math_fn", "node", b"node", "ref", b"ref", "string_fn", b"string_fn", "struct_fn", b"struct_fn", "unary", b"unary"]) -> None: ... - def WhichOneof(self, oneof_group: typing_extensions.Literal["node", b"node"]) -> typing_extensions.Literal["ref", "json_literal", "unary", "case", "binary", "isnull", "fillnull", "list_fn", "math_fn", "struct_fn", "dict_fn", "string_fn", "datetime_fn", "datetime_literal", "make_struct", "from_epoch"] | None: ... + def HasField(self, field_name: typing_extensions.Literal["binary", b"binary", "case", b"case", "datetime_fn", b"datetime_fn", "datetime_literal", b"datetime_literal", "dict_fn", b"dict_fn", "fillnull", b"fillnull", "from_epoch", b"from_epoch", "isnull", b"isnull", "json_literal", b"json_literal", "list_fn", b"list_fn", "make_struct", b"make_struct", "math_fn", b"math_fn", "node", b"node", "ref", b"ref", "string_fn", b"string_fn", "struct_fn", b"struct_fn", "unary", b"unary", "var", b"var"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["binary", b"binary", "case", b"case", "datetime_fn", b"datetime_fn", "datetime_literal", b"datetime_literal", "dict_fn", b"dict_fn", "fillnull", b"fillnull", "from_epoch", b"from_epoch", "isnull", b"isnull", "json_literal", b"json_literal", "list_fn", b"list_fn", "make_struct", b"make_struct", "math_fn", b"math_fn", "node", b"node", "ref", b"ref", "string_fn", b"string_fn", "struct_fn", b"struct_fn", "unary", b"unary", "var", b"var"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["node", b"node"]) -> typing_extensions.Literal["ref", "json_literal", "unary", "case", "binary", "isnull", "fillnull", "list_fn", "math_fn", "struct_fn", "dict_fn", "string_fn", "datetime_fn", "datetime_literal", "make_struct", "from_epoch", "var"] | None: ... global___Expr = Expr +@typing_extensions.final +class Var(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + NAME_FIELD_NUMBER: builtins.int + name: builtins.str + def __init__( + self, + *, + name: builtins.str = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["name", b"name"]) -> None: ... + +global___Var = Var + @typing_extensions.final class FromEpoch(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -454,6 +473,14 @@ class ListOp(google.protobuf.message.Message): GET_FIELD_NUMBER: builtins.int CONTAINS_FIELD_NUMBER: builtins.int HAS_NULL_FIELD_NUMBER: builtins.int + SUM_FIELD_NUMBER: builtins.int + MIN_FIELD_NUMBER: builtins.int + MAX_FIELD_NUMBER: builtins.int + ALL_FIELD_NUMBER: builtins.int + ANY_FIELD_NUMBER: builtins.int + MEAN_FIELD_NUMBER: builtins.int + FILTER_FIELD_NUMBER: builtins.int + MAP_FIELD_NUMBER: builtins.int @property def len(self) -> global___Len: ... @property @@ -464,6 +491,22 @@ class ListOp(google.protobuf.message.Message): """Check if the list contains an element""" @property def has_null(self) -> global___HasNull: ... + @property + def sum(self) -> global___ListSum: ... + @property + def min(self) -> global___ListMin: ... + @property + def max(self) -> global___ListMax: ... + @property + def all(self) -> global___ListAll: ... + @property + def any(self) -> global___ListAny: ... + @property + def mean(self) -> global___ListMean: ... + @property + def filter(self) -> global___ListFilter: ... + @property + def map(self) -> global___ListMap: ... def __init__( self, *, @@ -471,13 +514,121 @@ class ListOp(google.protobuf.message.Message): get: global___Expr | None = ..., contains: global___Contains | None = ..., has_null: global___HasNull | None = ..., + sum: global___ListSum | None = ..., + min: global___ListMin | None = ..., + max: global___ListMax | None = ..., + all: global___ListAll | None = ..., + any: global___ListAny | None = ..., + mean: global___ListMean | None = ..., + filter: global___ListFilter | None = ..., + map: global___ListMap | None = ..., ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["contains", b"contains", "fn_type", b"fn_type", "get", b"get", "has_null", b"has_null", "len", b"len"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["contains", b"contains", "fn_type", b"fn_type", "get", b"get", "has_null", b"has_null", "len", b"len"]) -> None: ... - def WhichOneof(self, oneof_group: typing_extensions.Literal["fn_type", b"fn_type"]) -> typing_extensions.Literal["len", "get", "contains", "has_null"] | None: ... + def HasField(self, field_name: typing_extensions.Literal["all", b"all", "any", b"any", "contains", b"contains", "filter", b"filter", "fn_type", b"fn_type", "get", b"get", "has_null", b"has_null", "len", b"len", "map", b"map", "max", b"max", "mean", b"mean", "min", b"min", "sum", b"sum"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["all", b"all", "any", b"any", "contains", b"contains", "filter", b"filter", "fn_type", b"fn_type", "get", b"get", "has_null", b"has_null", "len", b"len", "map", b"map", "max", b"max", "mean", b"mean", "min", b"min", "sum", b"sum"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["fn_type", b"fn_type"]) -> typing_extensions.Literal["len", "get", "contains", "has_null", "sum", "min", "max", "all", "any", "mean", "filter", "map"] | None: ... global___ListOp = ListOp +@typing_extensions.final +class ListFilter(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + VAR_FIELD_NUMBER: builtins.int + PREDICATE_FIELD_NUMBER: builtins.int + var: builtins.str + @property + def predicate(self) -> global___Expr: ... + def __init__( + self, + *, + var: builtins.str = ..., + predicate: global___Expr | None = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["predicate", b"predicate"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["predicate", b"predicate", "var", b"var"]) -> None: ... + +global___ListFilter = ListFilter + +@typing_extensions.final +class ListMap(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + VAR_FIELD_NUMBER: builtins.int + MAP_EXPR_FIELD_NUMBER: builtins.int + var: builtins.str + @property + def map_expr(self) -> global___Expr: ... + def __init__( + self, + *, + var: builtins.str = ..., + map_expr: global___Expr | None = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["map_expr", b"map_expr"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["map_expr", b"map_expr", "var", b"var"]) -> None: ... + +global___ListMap = ListMap + +@typing_extensions.final +class ListSum(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___ListSum = ListSum + +@typing_extensions.final +class ListMin(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___ListMin = ListMin + +@typing_extensions.final +class ListMean(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___ListMean = ListMean + +@typing_extensions.final +class ListMax(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___ListMax = ListMax + +@typing_extensions.final +class ListAll(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___ListAll = ListAll + +@typing_extensions.final +class ListAny(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___ListAny = ListAny + @typing_extensions.final class Len(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor @@ -1023,13 +1174,18 @@ class Strftime(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor FORMAT_FIELD_NUMBER: builtins.int + TIMEZONE_FIELD_NUMBER: builtins.int format: builtins.str + @property + def timezone(self) -> global___Timezone: ... def __init__( self, *, format: builtins.str = ..., + timezone: global___Timezone | None = ..., ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["format", b"format"]) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["timezone", b"timezone"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["format", b"format", "timezone", b"timezone"]) -> None: ... global___Strftime = Strftime @@ -1038,12 +1194,17 @@ class Part(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor UNIT_FIELD_NUMBER: builtins.int + TIMEZONE_FIELD_NUMBER: builtins.int unit: global___TimeUnit.ValueType + @property + def timezone(self) -> global___Timezone: ... def __init__( self, *, unit: global___TimeUnit.ValueType = ..., + timezone: global___Timezone | None = ..., ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["unit", b"unit"]) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["timezone", b"timezone"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["timezone", b"timezone", "unit", b"unit"]) -> None: ... global___Part = Part diff --git a/pyproject.toml b/pyproject.toml index b40baedc8..c1fe29608 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.9" -pandas = {extras = ["performance"], version = "^2.2.2"} +pandas = { extras = ["performance"], version = "^2.2.2" } protobuf = "^4.22.4" frozendict = "^2.3.8" numpy = [ @@ -20,7 +20,7 @@ pytest = "7.1.3" pytest-rerunfailures = "^13.0" sortedcontainers = "^2.4.0" typing-extensions = "^4.12.0" -fennel-data-lib = "0.1.18" +fennel-data-lib = "0.1.20" pyarrow = "^14.0.2" [tool.poetry.dev-dependencies]