diff --git a/docs/api.yml b/docs/api.yml
index f1e3c7a17..b8139cf9f 100644
--- a/docs/api.yml
+++ b/docs/api.yml
@@ -88,36 +88,44 @@ sidebar:
pages:
- "api-reference/expressions/binary"
- "api-reference/expressions/col"
+ - "api-reference/expressions/datetime"
- "api-reference/expressions/eval"
+ - "api-reference/expressions/from_epoch"
- "api-reference/expressions/isnull"
- "api-reference/expressions/fillnull"
- "api-reference/expressions/lit"
- "api-reference/expressions/not"
- "api-reference/expressions/typeof"
- "api-reference/expressions/when"
- # - "api-reference/expressions/datetime"
- # - "api-reference/expressions/from_epoch"
- # - slug: "api-reference/expressions/dt"
- # title: "Datetime Expressions"
- # pages:
- # - "api-reference/expressions/dt.since"
- # - "api-reference/expressions/dt.since_epoch"
- # - "api-reference/expressions/dt.year"
- # - "api-reference/expressions/dt.month"
- # - "api-reference/expressions/dt.day"
- # - "api-reference/expressions/dt.hour"
- # - "api-reference/expressions/dt.minute"
- # - "api-reference/expressions/dt.second"
- # - "api-reference/expressions/dt.strftime"
+ - slug: "api-reference/expressions/dt"
+ title: "Datetime Expressions"
+ pages:
+ - "api-reference/expressions/dt/day"
+ - "api-reference/expressions/dt/hour"
+ - "api-reference/expressions/dt/minute"
+ - "api-reference/expressions/dt/month"
+ - "api-reference/expressions/dt/second"
+ - "api-reference/expressions/dt/since"
+ - "api-reference/expressions/dt/since_epoch"
+ - "api-reference/expressions/dt/strftime"
+ - "api-reference/expressions/dt/year"
- slug: "api-reference/expressions/list"
title: "List Expressions"
pages:
+ - "api-reference/expressions/list/all"
+ - "api-reference/expressions/list/any"
- "api-reference/expressions/list/at"
- "api-reference/expressions/list/contains"
+ - "api-reference/expressions/list/filter"
- "api-reference/expressions/list/hasnull"
- "api-reference/expressions/list/len"
+ - "api-reference/expressions/list/map"
+ - "api-reference/expressions/list/max"
+ - "api-reference/expressions/list/mean"
+ - "api-reference/expressions/list/min"
+ - "api-reference/expressions/list/sum"
- slug: "api-reference/expressions/num"
title: "Num Expressions"
diff --git a/docs/examples/api-reference/expressions/dt.py b/docs/examples/api-reference/expressions/dt.py
new file mode 100644
index 000000000..206e9d0b9
--- /dev/null
+++ b/docs/examples/api-reference/expressions/dt.py
@@ -0,0 +1,412 @@
+import pytest
+from typing import Optional, List
+import pandas as pd
+from datetime import datetime
+
+
+def test_year():
+ # docsnip year
+ from fennel.expr import col
+
+ # docsnip-highlight next-line
+ expr = col("x").dt.year()
+
+ # year works for any datetime type or optional datetime type
+ assert expr.typeof(schema={"x": datetime}) == int
+ assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[int]
+
+ # can be evaluated with a dataframe
+ df = pd.DataFrame(
+ {
+ "x": [
+ pd.Timestamp("2024-01-01 00:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 10:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 20:20:00", tz="UTC"),
+ ]
+ }
+ )
+ schema = {"x": datetime}
+ assert expr.eval(df, schema=schema).tolist() == [2024, 2024, 2024]
+
+ # also works with timezone aware datetimes
+ # docsnip-highlight next-line
+ expr = col("x").dt.year(timezone="US/Eastern")
+ assert expr.eval(df, schema=schema).tolist() == [2023, 2024, 2024]
+ # /docsnip
+
+
+def test_month():
+ # docsnip month
+ from fennel.expr import col
+
+ # docsnip-highlight next-line
+ expr = col("x").dt.month()
+
+ # month works for any datetime type or optional datetime type
+ assert expr.typeof(schema={"x": datetime}) == int
+ assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[int]
+
+ # can be evaluated with a dataframe
+ df = pd.DataFrame(
+ {
+ "x": [
+ pd.Timestamp("2024-01-01 00:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 10:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 20:20:00", tz="UTC"),
+ ]
+ }
+ )
+ schema = {"x": datetime}
+ assert expr.eval(df, schema=schema).tolist() == [1, 1, 1]
+
+ # also works with timezone aware datetimes
+ # docsnip-highlight next-line
+ expr = col("x").dt.month(timezone="US/Eastern")
+ assert expr.eval(df, schema=schema).tolist() == [12, 1, 1]
+ # /docsnip
+
+
+def test_from_epoch():
+ # docsnip from_epoch
+ from fennel.expr import col, from_epoch
+
+ # docsnip-highlight next-line
+ expr = from_epoch(col("x"), unit="second")
+
+ # from_epoch works for any int or optional int type
+ assert expr.typeof(schema={"x": int}) == datetime
+ assert expr.typeof(schema={"x": Optional[int]}) == Optional[datetime]
+
+ # can be evaluated with a dataframe
+ df = pd.DataFrame({"x": [1714857600, 1714857601, 1714857602]})
+ schema = {"x": int}
+ expected = [
+ pd.Timestamp("2024-05-04 21:20:00", tz="UTC"),
+ pd.Timestamp("2024-05-04 21:20:01", tz="UTC"),
+ pd.Timestamp("2024-05-04 21:20:02", tz="UTC"),
+ ]
+ assert expr.eval(df, schema=schema).tolist() == expected
+ # /docsnip
+
+
+def test_day():
+ # docsnip day
+ from fennel.expr import col
+
+ # docsnip-highlight next-line
+ expr = col("x").dt.day()
+
+ # day works for any datetime type or optional datetime type
+ assert expr.typeof(schema={"x": datetime}) == int
+ assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[int]
+
+ # can be evaluated with a dataframe
+ df = pd.DataFrame(
+ {
+ "x": [
+ pd.Timestamp("2024-01-01 00:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 10:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 20:20:00", tz="UTC"),
+ ]
+ }
+ )
+ schema = {"x": datetime}
+ assert expr.eval(df, schema=schema).tolist() == [1, 1, 1]
+
+ # also works with timezone aware datetimes
+ # docsnip-highlight next-line
+ expr = col("x").dt.day(timezone="US/Eastern")
+ assert expr.eval(df, schema=schema).tolist() == [31, 1, 1]
+ # /docsnip
+
+
+def test_hour():
+ # docsnip hour
+ from fennel.expr import col
+
+ # docsnip-highlight next-line
+ expr = col("x").dt.hour()
+
+ # hour works for any datetime type or optional datetime type
+ assert expr.typeof(schema={"x": datetime}) == int
+ assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[int]
+
+ # can be evaluated with a dataframe
+ df = pd.DataFrame(
+ {
+ "x": [
+ pd.Timestamp("2024-01-01 00:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 10:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 20:20:00", tz="UTC"),
+ ]
+ }
+ )
+ schema = {"x": datetime}
+ assert expr.eval(df, schema=schema).tolist() == [0, 10, 20]
+
+ # also works with timezone aware datetimes
+ # docsnip-highlight next-line
+ expr = col("x").dt.hour(timezone="US/Eastern")
+ assert expr.eval(df, schema=schema).tolist() == [19, 5, 15]
+ # /docsnip
+
+
+def test_minute():
+ # docsnip minute
+ from fennel.expr import col
+
+ # docsnip-highlight next-line
+ expr = col("x").dt.minute()
+
+ # minute works for any datetime type or optional datetime type
+ assert expr.typeof(schema={"x": datetime}) == int
+ assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[int]
+
+ # can be evaluated with a dataframe
+ df = pd.DataFrame(
+ {
+ "x": [
+ pd.Timestamp("2024-01-01 00:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 10:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 20:20:00", tz="UTC"),
+ ]
+ }
+ )
+ schema = {"x": datetime}
+ assert expr.eval(df, schema=schema).tolist() == [0, 0, 20]
+
+ # also works with timezone aware datetimes
+ # docsnip-highlight next-line
+ expr = col("x").dt.minute(timezone="US/Eastern")
+ assert expr.eval(df, schema=schema).tolist() == [0, 0, 20]
+ # /docsnip
+
+
+def test_second():
+ # docsnip second
+ from fennel.expr import col
+
+ # docsnip-highlight next-line
+ expr = col("x").dt.second()
+
+ # second works for any datetime type or optional datetime type
+ assert expr.typeof(schema={"x": datetime}) == int
+ assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[int]
+
+ # can be evaluated with a dataframe
+ df = pd.DataFrame(
+ {
+ "x": [
+ pd.Timestamp("2024-01-01 00:00:01", tz="UTC"),
+ pd.Timestamp("2024-01-01 10:00:02", tz="UTC"),
+ pd.Timestamp("2024-01-01 20:20:03", tz="UTC"),
+ ]
+ }
+ )
+ schema = {"x": datetime}
+ assert expr.eval(df, schema=schema).tolist() == [1, 2, 3]
+
+ # also works with timezone aware datetimes
+ # docsnip-highlight next-line
+ expr = col("x").dt.second(timezone="Asia/Kathmandu")
+ assert expr.eval(df, schema=schema).tolist() == [1, 2, 3]
+ # /docsnip
+
+
+def test_since_epoch():
+ # docsnip since_epoch
+ from fennel.expr import col
+
+ # docsnip-highlight next-line
+ expr = col("x").dt.since_epoch()
+
+ # since_epoch works for any datetime type or optional datetime type
+ assert expr.typeof(schema={"x": datetime}) == int
+ assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[int]
+
+ # can be evaluated with a dataframe
+ df = pd.DataFrame(
+ {
+ "x": [
+ pd.Timestamp("2024-01-01 00:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 10:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 20:20:00", tz="UTC"),
+ ]
+ }
+ )
+ schema = {"x": datetime}
+ expected = [1704067200, 1704103200, 1704140400]
+ assert expr.eval(df, schema=schema).tolist() == expected
+
+ # can also change the unit of time
+ # docsnip-highlight next-line
+ expr = col("x").dt.since_epoch(unit="minute")
+ assert expr.eval(df, schema=schema).tolist() == [
+ 28401120,
+ 28401720,
+ 28402340,
+ ]
+ # /docsnip
+
+ expr = col("x").dt.since_epoch(unit="day")
+ assert expr.eval(df, schema=schema).tolist() == [
+ 19723,
+ 19723,
+ 19723,
+ ]
+
+ expr = col("x").dt.since_epoch(unit="hour")
+ assert expr.eval(df, schema=schema).tolist() == [
+ 473352,
+ 473362,
+ 473372,
+ ]
+ expr = col("x").dt.since_epoch(unit="millisecond")
+ assert expr.eval(df, schema=schema).tolist() == [
+ 1704067200000,
+ 1704103200000,
+ 1704140400000,
+ ]
+
+ expr = col("x").dt.since_epoch(unit="microsecond")
+ assert expr.eval(df, schema=schema).tolist() == [
+ 1704067200000000,
+ 1704103200000000,
+ 1704140400000000,
+ ]
+
+ expr = col("x").dt.since_epoch(unit="week")
+ assert expr.eval(df, schema=schema).tolist() == [
+ 2817,
+ 2817,
+ 2817,
+ ]
+
+ with pytest.raises(ValueError):
+ col("x").dt.since_epoch(unit="nanosecond")
+
+
+def test_since():
+ # docsnip since
+ from fennel.expr import col
+
+ # docsnip-highlight next-line
+ expr = col("x").dt.since(col("y"))
+
+ # since works for any datetime type or optional datetime type
+ assert expr.typeof(schema={"x": datetime, "y": datetime}) == int
+ assert (
+ expr.typeof(schema={"x": Optional[datetime], "y": datetime})
+ == Optional[int]
+ )
+
+ # can be evaluated with a dataframe
+ df = pd.DataFrame(
+ {
+ "x": [
+ pd.Timestamp("2024-01-01 00:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 10:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 20:20:00", tz="UTC"),
+ ],
+ "y": [
+ pd.Timestamp("2023-01-01 00:00:00", tz="UTC"),
+ pd.Timestamp("2023-01-02 10:00:00", tz="UTC"),
+ pd.Timestamp("2023-01-03 20:20:00", tz="UTC"),
+ ],
+ }
+ )
+ schema = {"x": datetime, "y": datetime}
+ expected = [31536000, 31449600, 31363200]
+ assert expr.eval(df, schema=schema).tolist() == expected
+
+ # can also change the unit of time
+ # docsnip-highlight next-line
+ expr = col("x").dt.since(col("y"), unit="minute")
+ assert expr.eval(df, schema=schema).tolist() == [
+ 525600,
+ 524160,
+ 522720,
+ ]
+ # /docsnip
+
+ expr = col("x").dt.since(col("y"), unit="day")
+ assert expr.eval(df, schema=schema).tolist() == [
+ 365,
+ 364,
+ 363,
+ ]
+
+ expr = col("x").dt.since(col("y"), unit="hour")
+ assert expr.eval(df, schema=schema).tolist() == [
+ 8760,
+ 8736,
+ 8712,
+ ]
+
+
+def test_strftime():
+ # docsnip strftime
+ from fennel.expr import col
+
+ # docsnip-highlight next-line
+ expr = col("x").dt.strftime("%Y-%m-%d")
+
+ # strftime works for any datetime type or optional datetime type
+ assert expr.typeof(schema={"x": datetime}) == str
+ assert expr.typeof(schema={"x": Optional[datetime]}) == Optional[str]
+
+ # can be evaluated with a dataframe
+ df = pd.DataFrame(
+ {
+ "x": [
+ pd.Timestamp("2024-01-01 00:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-02 10:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-03 20:20:00", tz="UTC"),
+ ]
+ }
+ )
+ schema = {"x": datetime}
+ assert expr.eval(df, schema=schema).tolist() == [
+ "2024-01-01",
+ "2024-01-02",
+ "2024-01-03",
+ ]
+
+ # also works with timezone aware datetimes
+ # docsnip-highlight next-line
+ expr = col("x").dt.strftime("%Y-%m-%d", timezone="US/Eastern")
+ assert expr.eval(df, schema=schema).tolist() == [
+ "2023-12-31",
+ "2024-01-02",
+ "2024-01-03",
+ ]
+ # /docsnip
+
+
+def test_datetime():
+ # docsnip datetime
+ # docsnip-highlight next-line
+ from fennel.expr import datetime as dt
+
+ # docsnip-highlight next-line
+ expr = dt(year=2024, month=1, day=1)
+
+ # datetime works for any datetime type or optional datetime type
+ assert expr.typeof() == datetime
+
+ # can be evaluated with a dataframe
+ df = pd.DataFrame({"dummy": [1, 2, 3]})
+ assert expr.eval(df, schema={"dummy": int}).tolist() == [
+ pd.Timestamp("2024-01-01 00:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 00:00:00", tz="UTC"),
+ pd.Timestamp("2024-01-01 00:00:00", tz="UTC"),
+ ]
+ # can provide timezone
+ # docsnip-highlight next-line
+ expr = dt(year=2024, month=1, day=1, timezone="US/Eastern")
+ assert expr.eval(df, schema={"dummy": int}).tolist() == [
+ pd.Timestamp("2024-01-01 00:00:00", tz="US/Eastern"),
+ pd.Timestamp("2024-01-01 00:00:00", tz="US/Eastern"),
+ pd.Timestamp("2024-01-01 00:00:00", tz="US/Eastern"),
+ ]
+ # /docsnip
diff --git a/docs/examples/api-reference/expressions/list.py b/docs/examples/api-reference/expressions/list.py
index d635440f0..acb1adf6b 100644
--- a/docs/examples/api-reference/expressions/list.py
+++ b/docs/examples/api-reference/expressions/list.py
@@ -136,3 +136,199 @@ def test_at_negative():
schema = {"x": Optional[List[Optional[int]]], "y": int}
assert expr.eval(df, schema=schema).tolist() == [3, pd.NA, 5, pd.NA]
# /docsnip
+
+
+def test_list_sum():
+ # docsnip sum
+ from fennel.expr import col
+
+ # docsnip-highlight next-line
+ expr = col("x").list.sum()
+
+ # works for lists of int/float or their optional versions
+ assert expr.typeof(schema={"x": List[int]}) == int
+ assert expr.typeof(schema={"x": Optional[List[float]]}) == Optional[float]
+
+ with pytest.raises(Exception):
+ expr.typeof(schema={"x": List[str]})
+
+ # can be evaluated as well
+ df = pd.DataFrame({"x": [[1, 2, 3], [4, 5, None], [], None]})
+ schema = {"x": Optional[List[Optional[int]]]}
+ assert expr.eval(df, schema=schema).tolist() == [6, pd.NA, 0, pd.NA]
+ # /docsnip
+
+
+def test_list_min():
+ # docsnip min
+ from fennel.expr import col
+
+ # docsnip-highlight next-line
+ expr = col("x").list.min()
+
+ # works for lists of int/float or their optional versions
+ assert expr.typeof(schema={"x": List[int]}) == Optional[int]
+ assert expr.typeof(schema={"x": Optional[List[float]]}) == Optional[float]
+
+ with pytest.raises(Exception):
+ expr.typeof(schema={"x": List[str]})
+
+ # can be evaluated as well
+ df = pd.DataFrame({"x": [[1, 2, 3], [4, 5, None], [], None]})
+ schema = {"x": Optional[List[Optional[int]]]}
+ assert expr.eval(df, schema=schema).tolist() == [1, pd.NA, pd.NA, pd.NA]
+ # /docsnip
+
+
+def test_list_max():
+ # docsnip max
+ from fennel.expr import col
+
+ # docsnip-highlight next-line
+ expr = col("x").list.max()
+
+ # works for lists of int/float or their optional versions
+ assert expr.typeof(schema={"x": List[int]}) == Optional[int]
+ assert expr.typeof(schema={"x": Optional[List[float]]}) == Optional[float]
+
+ with pytest.raises(Exception):
+ expr.typeof(schema={"x": List[str]})
+
+ # can be evaluated as well
+ df = pd.DataFrame({"x": [[1, 2, 3], [4, 5, None], [], None]})
+ schema = {"x": Optional[List[Optional[int]]]}
+ assert expr.eval(df, schema=schema).tolist() == [3, pd.NA, pd.NA, pd.NA]
+ # /docsnip
+
+
+def test_list_mean():
+ # docsnip mean
+ from fennel.expr import col
+
+ # docsnip-highlight next-line
+ expr = col("x").list.mean()
+
+ # works for lists of int/float or their optional versions
+ assert expr.typeof(schema={"x": List[int]}) == Optional[float]
+ assert expr.typeof(schema={"x": Optional[List[float]]}) == Optional[float]
+
+ with pytest.raises(Exception):
+ expr.typeof(schema={"x": List[str]})
+
+ # can be evaluated as well
+ df = pd.DataFrame({"x": [[1, 2, 3], [4, 5, None], [], None]})
+ schema = {"x": Optional[List[Optional[int]]]}
+ assert expr.eval(df, schema=schema).tolist() == [2.0, pd.NA, pd.NA, pd.NA]
+ # /docsnip
+
+
+def test_list_all():
+ # docsnip all
+ from fennel.expr import col
+
+ # docsnip-highlight next-line
+ expr = col("x").list.all()
+
+ # works for lists of int/float or their optional versions
+ assert expr.typeof(schema={"x": List[bool]}) == bool
+ assert expr.typeof(schema={"x": List[Optional[bool]]}) == Optional[bool]
+ assert (
+ expr.typeof(schema={"x": Optional[List[Optional[bool]]]})
+ == Optional[bool]
+ )
+
+ with pytest.raises(Exception):
+ expr.typeof(schema={"x": List[str]})
+
+ # can be evaluated as well
+ df = pd.DataFrame(
+ {"x": [[True, True], [True, False], [], None, [True, None]]}
+ )
+ schema = {"x": Optional[List[Optional[bool]]]}
+ assert expr.eval(df, schema=schema).tolist() == [
+ True,
+ False,
+ True,
+ pd.NA,
+ pd.NA,
+ ]
+ # /docsnip
+
+
+def test_list_any():
+ # docsnip any
+ from fennel.expr import col
+
+ # docsnip-highlight next-line
+ expr = col("x").list.any()
+
+ # works for lists of int/float or their optional versions
+ assert expr.typeof(schema={"x": List[bool]}) == bool
+ assert expr.typeof(schema={"x": List[Optional[bool]]}) == Optional[bool]
+ assert (
+ expr.typeof(schema={"x": Optional[List[Optional[bool]]]})
+ == Optional[bool]
+ )
+
+ with pytest.raises(Exception):
+ expr.typeof(schema={"x": List[str]})
+
+ # can be evaluated as well
+ df = pd.DataFrame(
+ {"x": [[True, True], [True, False], [], None, [True, None]]}
+ )
+ schema = {"x": Optional[List[Optional[bool]]]}
+ assert expr.eval(df, schema=schema).tolist() == [
+ True,
+ True,
+ False,
+ pd.NA,
+ True,
+ ]
+ # /docsnip
+
+
+def test_list_filter():
+ # docsnip filter
+ from fennel.expr import col, var
+
+ # docsnip-highlight next-line
+ expr = col("x").list.filter("x", var("x") % 2 == 0)
+
+ # works as long as predicate is valid and evaluates to bool
+ assert expr.typeof(schema={"x": List[int]}) == List[int]
+ assert expr.typeof(schema={"x": List[float]}) == List[float]
+
+ with pytest.raises(Exception):
+ expr.typeof(schema={"x": List[str]})
+
+ # can be evaluated as well
+ df = pd.DataFrame({"x": [[1, 2, 3], [], [1, 2, -2], None, [1, 3]]})
+ schema = {"x": Optional[List[int]]}
+ assert expr.eval(df, schema=schema).tolist() == [
+ [2],
+ [],
+ [2, -2],
+ pd.NA,
+ [],
+ ]
+ # /docsnip
+
+
+def test_list_map():
+ # docsnip map
+ from fennel.expr import col, var
+
+ # docsnip-highlight next-line
+ expr = col("x").list.map("x", var("x") % 2)
+
+ # works as long as predicate is valid
+ assert expr.typeof(schema={"x": List[int]}) == List[int]
+ assert expr.typeof(schema={"x": List[Optional[int]]}) == List[Optional[int]]
+
+ # can be evaluated as well
+ df = pd.DataFrame({"x": [[1, 2, 3], [], [1, 2, None], None, [1, 3]]})
+ schema = {"x": Optional[List[Optional[int]]]}
+ expected = [[1, 0, 1], [], [1, 0, pd.NA], pd.NA, [1, 1]]
+ assert expr.eval(df, schema=schema).tolist() == expected
+ # /docsnip
diff --git a/docs/pages/api-reference/expressions/datetime.md b/docs/pages/api-reference/expressions/datetime.md
new file mode 100644
index 000000000..aa8e8d0ff
--- /dev/null
+++ b/docs/pages/api-reference/expressions/datetime.md
@@ -0,0 +1,78 @@
+---
+title: Datetime
+order: 0
+status: published
+---
+
+### Datetime
+
+Function to get a constant datetime object from its constituent parts.
+
+#### Parameters
+
+The year of the datetime. Note that this must be an integer, not an
+expression denoting an integer.
+
+
+
+The month of the datetime. Note that this must be an integer, not an
+expression denoting an integer.
+
+
+
+The day of the datetime. Note that this must be an integer, not an
+expression denoting an integer.
+
+
+
+The hour of the datetime. Note that this must be an integer, not an
+expression denoting an integer.
+
+
+
+
+The minute of the datetime. Note that this must be an integer, not an
+expression denoting an integer.
+
+
+
+The second of the datetime. Note that this must be an integer, not an
+expression denoting an integer.
+
+
+
+The millisecond of the datetime. Note that this must be an integer, not an
+expression denoting an integer.
+
+
+
+The microsecond of the datetime. Note that this must be an integer, not an
+expression denoting an integer.
+
+
+
+The timezone of the datetime. Note that this must be a string denoting a valid
+timezone, not an expression denoting a string.
+
+
+#### Returns
+
+Returns an expression object denoting the datetime object.
+
+
+
+
+
+
+#### Errors
+
+The month must be between 1 and 12, the day must be between 1 and 31, the hour
+must be between 0 and 23, the minute must be between 0 and 59, the second must be
+between 0 and 59, the millisecond must be between 0 and 999, and the
+microsecond must be between 0 and 999.
+
+Timezone, if provided, must be a valid timezone string. Note that Fennel only
+supports area/location based timezones (e.g. "America/New_York"), not fixed
+offsets (e.g. "+05:30" or "UTC+05:30").
+
\ No newline at end of file
diff --git a/docs/pages/api-reference/expressions/dt/day.md b/docs/pages/api-reference/expressions/dt/day.md
new file mode 100644
index 000000000..a0a72d47a
--- /dev/null
+++ b/docs/pages/api-reference/expressions/dt/day.md
@@ -0,0 +1,37 @@
+---
+title: Day
+order: 0
+status: published
+---
+
+### Day
+
+Function to get the day component of a datetime object.
+
+#### Parameters
+
+The timezone in which to interpret the datetime. If not specified, UTC is used.
+
+
+#### Returns
+
+Returns an expression object denoting the integer value of the day of the
+datetime object.
+
+
+
+
+
+
+#### Errors
+
+The `dt` namespace must be invoked on an expression that evaluates to datetime
+or optional of datetime.
+
+
+
+The timezone, if provided, must be a valid timezone string. Note that Fennel
+only supports area/location based timezones (e.g. "America/New_York"), not
+fixed offsets (e.g. "+05:30" or "UTC+05:30").
+
\ No newline at end of file
diff --git a/docs/pages/api-reference/expressions/dt/hour.md b/docs/pages/api-reference/expressions/dt/hour.md
new file mode 100644
index 000000000..2eb61e5b1
--- /dev/null
+++ b/docs/pages/api-reference/expressions/dt/hour.md
@@ -0,0 +1,37 @@
+---
+title: Hour
+order: 0
+status: published
+---
+
+### Hour
+
+Function to get the hour component of a datetime object.
+
+#### Parameters
+
+The timezone in which to interpret the datetime. If not specified, UTC is used.
+
+
+#### Returns
+
+Returns an expression object denoting the integer value of the hour of the
+datetime object.
+
+
+
+
+
+
+#### Errors
+
+The `dt` namespace must be invoked on an expression that evaluates to datetime
+or optional of datetime.
+
+
+
+The timezone, if provided, must be a valid timezone string. Note that Fennel
+only supports area/location based timezones (e.g. "America/New_York"), not
+fixed offsets (e.g. "+05:30" or "UTC+05:30").
+
\ No newline at end of file
diff --git a/docs/pages/api-reference/expressions/dt/minute.md b/docs/pages/api-reference/expressions/dt/minute.md
new file mode 100644
index 000000000..0e0e366b3
--- /dev/null
+++ b/docs/pages/api-reference/expressions/dt/minute.md
@@ -0,0 +1,37 @@
+---
+title: Minute
+order: 0
+status: published
+---
+
+### Minute
+
+Function to get the minute component of a datetime object.
+
+#### Parameters
+
+The timezone in which to interpret the datetime. If not specified, UTC is used.
+
+
+#### Returns
+
+Returns an expression object denoting the integer value of the minute of the
+datetime object.
+
+
+
+
+
+
+#### Errors
+
+The `dt` namespace must be invoked on an expression that evaluates to datetime
+or optional of datetime.
+
+
+
+The timezone, if provided, must be a valid timezone string. Note that Fennel
+only supports area/location based timezones (e.g. "America/New_York"), not
+fixed offsets (e.g. "+05:30" or "UTC+05:30").
+
\ No newline at end of file
diff --git a/docs/pages/api-reference/expressions/dt/month.md b/docs/pages/api-reference/expressions/dt/month.md
new file mode 100644
index 000000000..d56cbd85a
--- /dev/null
+++ b/docs/pages/api-reference/expressions/dt/month.md
@@ -0,0 +1,37 @@
+---
+title: Month
+order: 0
+status: published
+---
+
+### Month
+
+Function to get the month component of a datetime object.
+
+#### Parameters
+
+The timezone in which to interpret the datetime. If not specified, UTC is used.
+
+
+#### Returns
+
+Returns an expression object denoting the integer value of the month of the
+datetime object.
+
+
+
+
+
+
+#### Errors
+
+The `dt` namespace must be invoked on an expression that evaluates to datetime
+or optional of datetime.
+
+
+
+The timezone, if provided, must be a valid timezone string. Note that Fennel
+only supports area/location based timezones (e.g. "America/New_York"), not
+fixed offsets (e.g. "+05:30" or "UTC+05:30").
+
\ No newline at end of file
diff --git a/docs/pages/api-reference/expressions/dt/second.md b/docs/pages/api-reference/expressions/dt/second.md
new file mode 100644
index 000000000..6cf4a35c9
--- /dev/null
+++ b/docs/pages/api-reference/expressions/dt/second.md
@@ -0,0 +1,37 @@
+---
+title: Second
+order: 0
+status: published
+---
+
+### Second
+
+Function to get the second component of a datetime object.
+
+#### Parameters
+
+The timezone in which to interpret the datetime. If not specified, UTC is used.
+
+
+#### Returns
+
+Returns an expression object denoting the integer value of the second of the
+datetime object.
+
+
+
+
+
+
+#### Errors
+
+The `dt` namespace must be invoked on an expression that evaluates to datetime
+or optional of datetime.
+
+
+
+The timezone, if provided, must be a valid timezone string. Note that Fennel
+only supports area/location based timezones (e.g. "America/New_York"), not
+fixed offsets (e.g. "+05:30" or "UTC+05:30").
+
\ No newline at end of file
diff --git a/docs/pages/api-reference/expressions/dt/since.md b/docs/pages/api-reference/expressions/dt/since.md
new file mode 100644
index 000000000..9e555935c
--- /dev/null
+++ b/docs/pages/api-reference/expressions/dt/since.md
@@ -0,0 +1,36 @@
+---
+title: Since
+order: 0
+status: published
+---
+
+### Since
+
+Function to get the time elapsed between two datetime objects.
+
+#### Parameters
+
+The datetime object to calculate the elapsed time since.
+
+
+
+The unit of time to return the elapsed time in. Defaults to seconds. Valid units
+are: `week`, `day`,`hour`, `minute`, `second`, `millisecond`, and `microsecond`.
+
+
+#### Returns
+
+Returns an expression object denoting the integer value of the elapsed time
+since the specified datetime object in the specified unit.
+
+
+
+
+
+
+#### Errors
+
+The `dt` namespace must be invoked on an expression that evaluates to datetime
+or optional of datetime.
+
diff --git a/docs/pages/api-reference/expressions/dt/since_epoch.md b/docs/pages/api-reference/expressions/dt/since_epoch.md
new file mode 100644
index 000000000..eae2a394b
--- /dev/null
+++ b/docs/pages/api-reference/expressions/dt/since_epoch.md
@@ -0,0 +1,32 @@
+---
+title: Since Epoch
+order: 0
+status: published
+---
+
+### Since Epoch
+
+Function to get the time elapsed since epoch for a datetime object.
+
+#### Parameters
+
+The unit of time to return the elapsed time in. Defaults to seconds. Valid units
+are: `week`, `day`,`hour`, `minute`, `second`, `millisecond`, and `microsecond`.
+
+
+#### Returns
+
+Returns an expression object denoting the integer value of the elapsed time
+since epoch for the datetime object in the specified unit.
+
+
+
+
+
+
+#### Errors
+
+The `dt` namespace must be invoked on an expression that evaluates to datetime
+or optional of datetime.
+
diff --git a/docs/pages/api-reference/expressions/dt/strftime.md b/docs/pages/api-reference/expressions/dt/strftime.md
new file mode 100644
index 000000000..27c5a7c06
--- /dev/null
+++ b/docs/pages/api-reference/expressions/dt/strftime.md
@@ -0,0 +1,44 @@
+---
+title: Strftime
+order: 0
+status: published
+---
+
+### Strftime
+
+Function to format a datetime object as a string.
+
+#### Parameters
+
+The format string to use for the datetime.
+
+
+
+The timezone in which to interpret the datetime. If not specified, UTC is used.
+
+
+#### Returns
+
+Returns an expression object denoting the formatted datetime string.
+
+
+
+
+
+
+#### Errors
+
+The `dt` namespace must be invoked on an expression that evaluates to datetime
+or optional of datetime.
+
+
+
+The format string must be a valid format string.
+
+
+
+The timezone must be a valid timezone. Note that Fennel only supports timezones
+with area/location names (e.g. `America/New_York`) and not timezones with offsets
+(e.g. `+05:00`).
+
diff --git a/docs/pages/api-reference/expressions/dt/year.md b/docs/pages/api-reference/expressions/dt/year.md
new file mode 100644
index 000000000..9a5294b86
--- /dev/null
+++ b/docs/pages/api-reference/expressions/dt/year.md
@@ -0,0 +1,37 @@
+---
+title: Year
+order: 0
+status: published
+---
+
+### Year
+
+Function to get the year component of a datetime object.
+
+#### Parameters
+
+The timezone in which to interpret the datetime. If not specified, UTC is used.
+
+
+#### Returns
+
+Returns an expression object denoting the integer value of the year of the
+datetime object.
+
+
+
+
+
+
+#### Errors
+
+The `dt` namespace must be invoked on an expression that evaluates to datetime
+or optional of datetime.
+
+
+
+The timezone, if provided, must be a valid timezone string. Note that Fennel
+only supports area/location based timezones (e.g. "America/New_York"), not
+fixed offsets (e.g. "+05:30" or "UTC+05:30").
+
\ No newline at end of file
diff --git a/docs/pages/api-reference/expressions/from_epoch.md b/docs/pages/api-reference/expressions/from_epoch.md
new file mode 100644
index 000000000..b8695b208
--- /dev/null
+++ b/docs/pages/api-reference/expressions/from_epoch.md
@@ -0,0 +1,29 @@
+---
+title: From Epoch
+order: 0
+status: published
+---
+
+### From Epoch
+
+Function to get a datetime object from a unix timestamp.
+
+#### Parameters
+
+The duration (in units as specified by `unit`) since epoch to convert to a datetime
+in the form of an expression denoting an integer.
+
+
+
+The unit of the `duration` parameter. Can be one of `second`, `millisecond`,
+or `microsecond`. Defaults to `second`.
+
+
+#### Returns
+
+Returns an expression object denoting the datetime object.
+
+
+
+
\ No newline at end of file
diff --git a/docs/pages/api-reference/expressions/list/all.md b/docs/pages/api-reference/expressions/list/all.md
new file mode 100644
index 000000000..f5538ff60
--- /dev/null
+++ b/docs/pages/api-reference/expressions/list/all.md
@@ -0,0 +1,30 @@
+---
+title: All
+order: 0
+status: published
+---
+
+### All
+
+Function to check if all the elements in a boolean list are `True`.
+
+#### Returns
+
+Returns an expression object denoting the result of the `all` operation.
+
+Only works when the list is of type bool or Optional[bool]. For an empty list,
+returns an expression denoting `True`. If the list has one or more `None`
+elements, the result becomes `None`.
+
+
+
+
+
+
+#### Errors
+
+The `list` namespace must be invoked on an expression that evaluates to list
+or optional of list. `All` can only be invoked on lists of bools (or
+optionals of bool).
+
diff --git a/docs/pages/api-reference/expressions/list/any.md b/docs/pages/api-reference/expressions/list/any.md
new file mode 100644
index 000000000..2d9a20eda
--- /dev/null
+++ b/docs/pages/api-reference/expressions/list/any.md
@@ -0,0 +1,31 @@
+---
+title: Any
+order: 0
+status: published
+---
+
+### Any
+
+Function to check if a boolean list contains any `True` value.
+
+#### Returns
+
+Returns an expression object denoting the result of `any` operation.
+
+Only works when the list is of type bool(or optional bool). For
+an empty list, returns an expression denoting 'False'. If the list has one or more
+`None` elements, the result becomes `None` unless it also has `True` in which case
+the result is still `True`.
+
+
+
+
+
+
+#### Errors
+
+The `list` namespace must be invoked on an expression that evaluates to list
+or optional of list. `Any` can only be invoked on lists of bool (or
+optionals of bool).
+
diff --git a/docs/pages/api-reference/expressions/list/filter.md b/docs/pages/api-reference/expressions/list/filter.md
new file mode 100644
index 000000000..ed2b59ee1
--- /dev/null
+++ b/docs/pages/api-reference/expressions/list/filter.md
@@ -0,0 +1,38 @@
+---
+title: Filter
+order: 0
+status: published
+---
+
+### Filter
+
+Function to filter a list down to elements satisfying a predicate.
+
+#### Parameters
+
+The variable name to which each element of the list should be bound to
+one-by-one.
+
+
+
+The predicate expression to be used to filter the list down. This must
+evaluate to bool for each element of the list. Note that this expression can
+refer to the element under consideration via `var(name)` where name is the
+first argument given to the `filter` operation (see example for details).
+
+
+#### Returns
+
+Returns an expression object denoting the filtered list.
+
+
+
+
+
+
+#### Errors
+
+The `list` namespace must be invoked on an expression that evaluates to list
+or optional of list.
+
\ No newline at end of file
diff --git a/docs/pages/api-reference/expressions/list/map.md b/docs/pages/api-reference/expressions/list/map.md
new file mode 100644
index 000000000..8b99c08ca
--- /dev/null
+++ b/docs/pages/api-reference/expressions/list/map.md
@@ -0,0 +1,38 @@
+---
+title: Map
+order: 0
+status: published
+---
+
+### Map
+
+Function to map each element of a list to get another list of the same size.
+
+#### Parameters
+
+The variable name to which each element of the list should be bound to
+one-by-one.
+
+
+
+The expression to be used to transform each element of the list. Note that
+this expression can refer to the element under consideration via `var(name)`
+where name is the first argument given to the `map` operation (see example for
+details).
+
+
+#### Returns
+
+Returns an expression object denoting the transformed list.
+
+
+
+
+
+
+#### Errors
+
+The `list` namespace must be invoked on an expression that evaluates to list
+or optional of list.
+
\ No newline at end of file
diff --git a/docs/pages/api-reference/expressions/list/max.md b/docs/pages/api-reference/expressions/list/max.md
new file mode 100644
index 000000000..ad59cc42a
--- /dev/null
+++ b/docs/pages/api-reference/expressions/list/max.md
@@ -0,0 +1,30 @@
+---
+title: Max
+order: 0
+status: published
+---
+
+### Max
+
+Function to get the maximum value of a list.
+
+#### Returns
+
+Returns an expression object denoting the max value of a list.
+
+Only works when the list is of type int/float (or their optional versions). For
+an empty list, returns an expression denoting 'None'. If the list has one or more
+`None` elements, the result becomes `None`.
+
+
+
+
+
+
+#### Errors
+
+The `list` namespace must be invoked on an expression that evaluates to list
+or optional of list. `Max` can only be invoked on lists of ints/floats (or
+optionals of ints/floats).
+
\ No newline at end of file
diff --git a/docs/pages/api-reference/expressions/list/mean.md b/docs/pages/api-reference/expressions/list/mean.md
new file mode 100644
index 000000000..c00a0363a
--- /dev/null
+++ b/docs/pages/api-reference/expressions/list/mean.md
@@ -0,0 +1,33 @@
+---
+title: Mean
+order: 0
+status: published
+---
+
+### Mean
+
+Function to get the mean of the values of a list.
+
+#### Returns
+
+Returns an expression object denoting the mean value of a list.
+
+Only works when the list is of type int/float (or their optional versions). For
+an empty list, returns an expression denoting 'None'. If the list has one or more
+`None` elements, the result becomes `None`.
+
+The output type of this expression is either `float` or `Optional[float]` depending
+on the inputs.
+
+
+
+
+
+
+#### Errors
+
+The `list` namespace must be invoked on an expression that evaluates to list
+or optional of list. `Mean` can only be invoked on lists of ints/floats (or
+optionals of ints/floats).
+
diff --git a/docs/pages/api-reference/expressions/list/min.md b/docs/pages/api-reference/expressions/list/min.md
new file mode 100644
index 000000000..6f719ac9d
--- /dev/null
+++ b/docs/pages/api-reference/expressions/list/min.md
@@ -0,0 +1,30 @@
+---
+title: Min
+order: 0
+status: published
+---
+
+### Min
+
+Function to get the min value of a list.
+
+#### Returns
+
+Returns an expression object denoting the min value of a list.
+
+Only works when the list is of type int/float (or their optional versions). For
+an empty list, returns an expression denoting 'None'. If the list has one or more
+`None` elements, the result becomes `None`.
+
+
+
+
+
+
+#### Errors
+
+The `list` namespace must be invoked on an expression that evaluates to list
+or optional of list. `Min` can only be invoked on lists of ints/floats (or
+optionals of ints/floats).
+
\ No newline at end of file
diff --git a/docs/pages/api-reference/expressions/list/sum.md b/docs/pages/api-reference/expressions/list/sum.md
new file mode 100644
index 000000000..1eb946036
--- /dev/null
+++ b/docs/pages/api-reference/expressions/list/sum.md
@@ -0,0 +1,30 @@
+---
+title: Sum
+order: 0
+status: published
+---
+
+### Sum
+
+Function to get the sum of values of a list.
+
+#### Returns
+
+Returns an expression object denoting the sum of the values of the list.
+
+Only works when the list is of type int/float (or their optional versions). For
+an empty list, returns an expression denoting '0'. If the list has one or more
+`None` elements, the whole sum becomes `None`.
+
+
+
+
+
+
+#### Errors
+
+The `list` namespace must be invoked on an expression that evaluates to list
+or optional of list. `Sum` can only be invoked on lists of ints/floats (or
+optionals of ints/floats).
+
\ No newline at end of file
diff --git a/fennel/connectors/test_invalid_connectors.py b/fennel/connectors/test_invalid_connectors.py
index c60f7a749..dc9d271c4 100644
--- a/fennel/connectors/test_invalid_connectors.py
+++ b/fennel/connectors/test_invalid_connectors.py
@@ -940,6 +940,6 @@ class UserInfoDataset:
client.commit(datasets=[UserInfoDataset], message="test")
assert (
- "`age` is of type `int` in Dataset `UserInfoDataset`, can not be cast to `float`. Full expression: `col('val1')`"
+ '`age` is of type `int` in Dataset `UserInfoDataset`, can not be cast to `float`. Full expression: `col("val1")`'
== str(e.value)
)
diff --git a/fennel/datasets/test_invalid_dataset.py b/fennel/datasets/test_invalid_dataset.py
index 556e7d704..f4a3f8c9e 100644
--- a/fennel/datasets/test_invalid_dataset.py
+++ b/fennel/datasets/test_invalid_dataset.py
@@ -247,7 +247,7 @@ def transform(cls, rating: Dataset):
.astype(int),
).drop("rating", "movie")
- expected_err = "'movie_suffixed' is expected to be of type `int`, but evaluates to `str`. Full expression: `col('movie') + \"_suffix\"`"
+ expected_err = '\'movie_suffixed\' is expected to be of type `int`, but evaluates to `str`. Full expression: `col("movie") + "_suffix"`'
assert expected_err in str(e.value)
with pytest.raises(TypeError) as e2:
@@ -295,7 +295,7 @@ def transform(cls, rating: Dataset):
assert (
str(e2.value)
- == """invalid assign - '[Pipeline:transform]->assign node' error in expression for column `movie_suffixed`: Failed to compile expression: invalid expression: both sides of '+' must be numeric types but found String & String, left: col(movie), right: lit(String("_suffix"))"""
+ == """invalid assign - '[Pipeline:transform]->assign node' error in expression for column `movie_suffixed`: Failed to compile expression: invalid expression: both sides of '+' must be numeric types but found String & String, left: col("movie"), right: lit(String("_suffix"))"""
)
diff --git a/fennel/expr/__init__.py b/fennel/expr/__init__.py
index 1a7e235a1..362f199b5 100644
--- a/fennel/expr/__init__.py
+++ b/fennel/expr/__init__.py
@@ -3,6 +3,8 @@
lit,
when,
make_struct,
+ var,
+ datetime,
from_epoch,
Expr,
InvalidExprException,
diff --git a/fennel/expr/expr.py b/fennel/expr/expr.py
index fcf037482..c926a6902 100644
--- a/fennel/expr/expr.py
+++ b/fennel/expr/expr.py
@@ -430,6 +430,14 @@ def __str__(self) -> str:
return f"{self.expr}"
+class Var(Expr):
+ def __init__(self, var: str):
+ self.var = var
+
+ def __str__(self) -> str:
+ return f"var({self.var})"
+
+
#########################################################
# Math Functions
#########################################################
@@ -694,6 +702,7 @@ def from_string(time_unit_str: str | TimeUnit) -> TimeUnit:
@dataclass
class DateTimeParts(DateTimeOp):
part: TimeUnit
+ timezone: Optional[str]
@dataclass
@@ -710,6 +719,7 @@ class DateTimeSinceEpoch(DateTimeOp):
@dataclass
class DateTimeStrftime(DateTimeOp):
format: str
+ timezone: Optional[str]
@dataclass
@@ -718,65 +728,67 @@ class DateTimeFromEpoch(Expr):
unit: TimeUnit
+@dataclass
+class DateTimeLiteral(DateTimeOp):
+ year: int
+ month: int
+ day: int
+ hour: int
+ minute: int
+ second: int
+ microsecond: int
+ timezone: Optional[str]
+
+
class _DateTime(Expr):
def __init__(self, expr: Expr, op: DateTimeOp):
self.op = op
self.operand = expr
super(_DateTime, self).__init__()
- def parts(self, part: TimeUnit) -> _Number:
+ def parts(self, part: TimeUnit, timezone: Optional[str] = "UTC") -> _Number:
part = TimeUnit.from_string(part)
- return _Number(_DateTime(self, DateTimeParts(part)), MathNoop())
+ return _Number(
+ _DateTime(self, DateTimeParts(part, timezone)), MathNoop()
+ )
- def since(self, other: Expr, unit: TimeUnit) -> _Number:
+ def since(self, other: Expr, unit: TimeUnit = "second") -> _Number:
unit = TimeUnit.from_string(unit)
other_expr = make_expr(other)
return _Number(
_DateTime(self, DateTimeSince(other_expr, unit)), MathNoop()
)
- def since_epoch(self, unit: TimeUnit) -> _Number:
+ def since_epoch(self, unit: TimeUnit = "second") -> _Number:
unit = TimeUnit.from_string(unit)
return _Number(_DateTime(self, DateTimeSinceEpoch(unit)), MathNoop())
- def strftime(self, format: str) -> _String:
- return _String(_DateTime(self, DateTimeStrftime(format)), StringNoop())
-
- @property
- def year(self) -> _Number:
- return self.parts(TimeUnit.YEAR)
-
- @property
- def month(self) -> _Number:
- return self.parts(TimeUnit.MONTH)
+ def strftime(self, format: str, timezone: Optional[str] = "UTC") -> _String:
+ return _String(
+ _DateTime(self, DateTimeStrftime(format=format, timezone=timezone)),
+ StringNoop(),
+ )
- @property
- def week(self) -> _Number:
- return self.parts(TimeUnit.WEEK)
+ def year(self, timezone: Optional[str] = "UTC") -> _Number:
+ return self.parts(TimeUnit.YEAR, timezone)
- @property
- def day(self) -> _Number:
- return self.parts(TimeUnit.DAY)
+ def month(self, timezone: Optional[str] = "UTC") -> _Number:
+ return self.parts(TimeUnit.MONTH, timezone)
- @property
- def hour(self) -> _Number:
- return self.parts(TimeUnit.HOUR)
+ def week(self, timezone: Optional[str] = "UTC") -> _Number:
+ return self.parts(TimeUnit.WEEK, timezone)
- @property
- def minute(self) -> _Number:
- return self.parts(TimeUnit.MINUTE)
+ def day(self, timezone: Optional[str] = "UTC") -> _Number:
+ return self.parts(TimeUnit.DAY, timezone)
- @property
- def second(self) -> _Number:
- return self.parts(TimeUnit.SECOND)
+ def hour(self, timezone: Optional[str] = "UTC") -> _Number:
+ return self.parts(TimeUnit.HOUR, timezone)
- @property
- def millisecond(self) -> _Number:
- return self.parts(TimeUnit.MILLISECOND)
+ def minute(self, timezone: Optional[str] = "UTC") -> _Number:
+ return self.parts(TimeUnit.MINUTE, timezone)
- @property
- def microsecond(self) -> _Number:
- return self.parts(TimeUnit.MICROSECOND)
+ def second(self, timezone: Optional[str] = "UTC") -> _Number:
+ return self.parts(TimeUnit.SECOND, timezone)
#########################################################
@@ -792,6 +804,42 @@ class ListLen(ListOp):
pass
+class ListSum(ListOp):
+ pass
+
+
+class ListMin(ListOp):
+ pass
+
+
+class ListMax(ListOp):
+ pass
+
+
+class ListAll(ListOp):
+ pass
+
+
+class ListAny(ListOp):
+ pass
+
+
+class ListMean(ListOp):
+ pass
+
+
+@dataclass
+class ListFilter(ListOp):
+ var: str
+ predicate: Expr
+
+
+@dataclass
+class ListMap(ListOp):
+ var: str
+ expr: Expr
+
+
@dataclass
class ListContains(ListOp):
item: Expr
@@ -830,6 +878,30 @@ def at(self, index: Expr) -> Expr:
def hasnull(self) -> _Bool:
return _Bool(_List(self, ListHasNull()))
+ def sum(self) -> _Number:
+ return _Number(_List(self, ListSum()), MathNoop())
+
+ def mean(self) -> _Number:
+ return _Number(_List(self, ListMean()), MathNoop())
+
+ def min(self) -> _Number:
+ return _Number(_List(self, ListMin()), MathNoop())
+
+ def max(self) -> _Number:
+ return _Number(_List(self, ListMax()), MathNoop())
+
+ def all(self) -> _Bool:
+ return _Bool(_List(self, ListAll()))
+
+ def any(self) -> _Bool:
+ return _Bool(_List(self, ListAny()))
+
+ def filter(self, var: str, predicate: Expr) -> _List:
+ return _List(self, ListFilter(var=var, predicate=predicate))
+
+ def map(self, var: str, expr: Expr) -> _List:
+ return _List(self, ListMap(var=var, expr=expr))
+
#######################################################
@@ -961,7 +1033,7 @@ def __init__(self, col: str):
super(Ref, self).__init__()
def __str__(self) -> str:
- return f"col('{self._col}')"
+ return f'col("{self._col}")'
class IsNull(Expr):
@@ -1015,6 +1087,10 @@ def col(col: str) -> Expr:
return Ref(col)
+def var(var: str) -> Expr:
+ return Var(var)
+
+
def lit(v: Any, type: Optional[Type] = None) -> Expr:
# TODO: Add support for more types recursively
if type is not None:
@@ -1048,3 +1124,28 @@ def from_epoch(duration: Expr, unit: str | TimeUnit) -> _DateTime:
duration = make_expr(duration)
unit = TimeUnit.from_string(unit)
return _DateTime(DateTimeFromEpoch(duration, unit), DateTimeNoop())
+
+
+def datetime(
+ year: int,
+ month: int,
+ day: int,
+ hour: int = 0,
+ minute: int = 0,
+ second: int = 0,
+ microsecond: int = 0,
+ timezone: Optional[str] = "UTC",
+) -> _DateTime:
+ return _DateTime(
+ DateTimeLiteral(
+ year=year,
+ month=month,
+ day=day,
+ hour=hour,
+ minute=minute,
+ second=second,
+ microsecond=microsecond,
+ timezone=timezone,
+ ),
+ DateTimeNoop(),
+ )
diff --git a/fennel/expr/serializer.py b/fennel/expr/serializer.py
index 3ad6019b9..0673ff049 100644
--- a/fennel/expr/serializer.py
+++ b/fennel/expr/serializer.py
@@ -17,6 +17,14 @@
ListHasNull,
ListLen,
ListNoop,
+ ListSum,
+ ListMean,
+ ListMin,
+ ListMax,
+ ListAll,
+ ListAny,
+ ListFilter,
+ ListMap,
Literal,
Ref,
StructGet,
@@ -60,8 +68,8 @@
def time_unit_to_proto(unit: TimeUnit) -> proto.TimeUnit:
- if unit == TimeUnit.MILLISECOND:
- return proto.TimeUnit.MILLISECOND
+ if unit == TimeUnit.MICROSECOND:
+ return proto.TimeUnit.MICROSECOND
elif unit == TimeUnit.MILLISECOND:
return proto.TimeUnit.MILLISECOND
elif unit == TimeUnit.SECOND:
@@ -104,6 +112,11 @@ def visitRef(self, obj):
expr.ref.name = obj._col
return expr
+ def visitVar(self, obj):
+ expr = proto.Expr()
+ expr.var.name = obj.var
+ return expr
+
def visitUnary(self, obj):
expr = proto.Expr()
if obj.op == "~":
@@ -321,8 +334,14 @@ def visitDateTime(self, obj):
if isinstance(obj.op, DateTimeNoop):
return self.visit(obj.operand)
elif isinstance(obj.op, DateTimeParts):
- part = proto.Part()
- part.unit = time_unit_to_proto(obj.op.part)
+ part = proto.Part(
+ unit=time_unit_to_proto(obj.op.part),
+ timezone=(
+ proto.Timezone(timezone=obj.op.timezone)
+ if obj.op.timezone is not None
+ else None
+ ),
+ )
expr.datetime_fn.fn.CopyFrom(proto.DateTimeOp(part=part))
elif isinstance(obj.op, DateTimeSince):
expr.datetime_fn.fn.CopyFrom(
@@ -346,6 +365,11 @@ def visitDateTime(self, obj):
proto.DateTimeOp(
strftime=proto.Strftime(
format=obj.op.format,
+ timezone=(
+ proto.Timezone(timezone=obj.op.timezone)
+ if obj.op.timezone is not None
+ else None
+ ),
)
)
)
@@ -376,6 +400,35 @@ def visitList(self, obj):
expr.list_fn.fn.CopyFrom(proto.ListOp(len=proto.Len()))
elif isinstance(obj.op, ListHasNull):
expr.list_fn.fn.CopyFrom(proto.ListOp(has_null=proto.HasNull()))
+ elif isinstance(obj.op, ListSum):
+ expr.list_fn.fn.CopyFrom(proto.ListOp(sum=proto.ListSum()))
+ elif isinstance(obj.op, ListMean):
+ expr.list_fn.fn.CopyFrom(proto.ListOp(mean=proto.ListMean()))
+ elif isinstance(obj.op, ListMin):
+ expr.list_fn.fn.CopyFrom(proto.ListOp(min=proto.ListMin()))
+ elif isinstance(obj.op, ListMax):
+ expr.list_fn.fn.CopyFrom(proto.ListOp(max=proto.ListMax()))
+ elif isinstance(obj.op, ListAll):
+ expr.list_fn.fn.CopyFrom(proto.ListOp(all=proto.ListAll()))
+ elif isinstance(obj.op, ListAny):
+ expr.list_fn.fn.CopyFrom(proto.ListOp(any=proto.ListAny()))
+ elif isinstance(obj.op, ListFilter):
+ expr.list_fn.fn.CopyFrom(
+ proto.ListOp(
+ filter=proto.ListFilter(
+ var=obj.op.var, predicate=self.visit(obj.op.predicate)
+ )
+ )
+ )
+ elif isinstance(obj.op, ListMap):
+ expr.list_fn.fn.CopyFrom(
+ proto.ListOp(
+ map=proto.ListMap(
+ var=obj.op.var, map_expr=self.visit(obj.op.expr)
+ )
+ )
+ )
+
expr.list_fn.list.CopyFrom(self.visit(obj.expr))
return expr
@@ -415,6 +468,23 @@ def visitDateTimeFromEpoch(self, obj):
expr.from_epoch.CopyFrom(from_epoch)
return expr
+ def visitDateTimeLiteral(self, obj):
+ expr = proto.Expr()
+ datetime_literal = proto.DatetimeLiteral()
+ datetime_literal.year = obj.year
+ datetime_literal.month = obj.month
+ datetime_literal.day = obj.day
+ datetime_literal.hour = obj.hour
+ datetime_literal.minute = obj.minute
+ datetime_literal.second = obj.second
+ datetime_literal.microsecond = obj.microsecond
+ if obj.timezone is not None:
+ datetime_literal.timezone.CopyFrom(
+ proto.Timezone(timezone=obj.timezone)
+ )
+ expr.datetime_literal.CopyFrom(datetime_literal)
+ return expr
+
def val_as_json(val: Any) -> str:
if isinstance(val, str):
diff --git a/fennel/expr/test_expr.py b/fennel/expr/test_expr.py
index 9fa7b6e78..1fe688379 100644
--- a/fennel/expr/test_expr.py
+++ b/fennel/expr/test_expr.py
@@ -62,7 +62,7 @@ def test_unary_expr():
def test_basic_expr2():
expr = col("a") + col("b") + 3
printer = ExprPrinter()
- expected = "((col('a') + col('b')) + 3)"
+ expected = '((col("a") + col("b")) + 3)'
assert expected == printer.print(expr.root)
serializer = ExprSerializer()
proto_expr = serializer.serialize(expr.root)
@@ -102,7 +102,7 @@ class TestDataset:
def test_math_expr():
expr = (col("a").num.floor() + 3.2).num.ceil()
printer = ExprPrinter()
- expected = "CEIL((FLOOR(col('a')) + 3.2))"
+ expected = 'CEIL((FLOOR(col("a")) + 3.2))'
assert expected == printer.print(expr.root)
serializer = ExprSerializer()
proto_expr = serializer.serialize(expr.root)
@@ -164,7 +164,7 @@ def test_math_expr():
def test_bool_expr():
expr = (col("a") == 5) | ((col("b") == "random") & (col("c") == 3.2))
printer = ExprPrinter()
- expected = """((col('a') == 5) or ((col('b') == "random") and (col('c') == 3.2)))"""
+ expected = """((col("a") == 5) or ((col("b") == "random") and (col("c") == 3.2)))"""
assert expected == printer.print(expr.root)
df = pd.DataFrame(
@@ -186,7 +186,7 @@ def test_bool_expr():
def test_str_expr():
expr = (col("a").str.concat(col("b"))).str.lower().len().ceil()
printer = ExprPrinter()
- expected = "CEIL(LEN(LOWER(col('a') + col('b'))))"
+ expected = 'CEIL(LEN(LOWER(col("a") + col("b"))))'
assert expected == printer.print(expr.root)
ref_extractor = FetchReferences()
ref_extractor.visit(expr.root)
@@ -199,7 +199,7 @@ def test_str_expr():
.then(col("b"))
.otherwise("No Match")
)
- expected = """WHEN CONTAINS(UPPER(col('a') + col('b')), col('c')) THEN col('b') ELSE "No Match\""""
+ expected = """WHEN CONTAINS(UPPER(col("a") + col("b")), col("c")) THEN col("b") ELSE "No Match\""""
assert expected == printer.print(expr.root)
ref_extractor = FetchReferences()
assert ref_extractor.fetch(expr.root) == {"a", "b", "c"}
@@ -232,7 +232,7 @@ def test_str_expr():
.then(col("c"))
.otherwise("No Match")
)
- expected = """WHEN CONTAINS(col('a'), "p") THEN col('b') WHEN CONTAINS(col('b'), "b") THEN col('a') WHEN CONTAINS(col('c'), "C") THEN col('c') ELSE "No Match\""""
+ expected = """WHEN CONTAINS(col("a"), "p") THEN col("b") WHEN CONTAINS(col("b"), "b") THEN col("a") WHEN CONTAINS(col("c"), "C") THEN col("c") ELSE "No Match\""""
assert expected == printer.print(expr.root)
serializer = ExprSerializer()
proto_expr = serializer.serialize(expr.root)
@@ -322,7 +322,7 @@ def test_dict_op():
).dict.len()
printer = ExprPrinter()
expected = (
- """(CEIL((col('a').get("x") + col('a').get("y"))) + LEN(col('a')))"""
+ """(CEIL((col("a").get("x") + col("a").get("y"))) + LEN(col("a")))"""
)
ref_extractor = FetchReferences()
ref_extractor.visit(expr.root)
@@ -476,7 +476,7 @@ def test_datetime_expr():
{"a": ["2021-01-01", "2021-01-02", "2021-01-03", "2021-01-04"]}
),
schema={"a": str},
- display="STRPTIME(col('a'), %Y-%m-%d, UTC)",
+ display='STRPTIME(col("a"), %Y-%m-%d, UTC)',
refs={"a"},
eval_result=[
pd.Timestamp("2021-01-01 00:00:00+0000", tz="UTC"),
@@ -494,7 +494,7 @@ def test_datetime_expr():
{"a": ["2021-01-01", "2021-01-02", "2021-01-03", "2021-01-04"]}
),
schema={"a": str},
- display="STRPTIME(col('a'), %Y-%m-%d, America/New_York)",
+ display='STRPTIME(col("a"), %Y-%m-%d, America/New_York)',
refs={"a"},
eval_result=[
pd.Timestamp("2021-01-01 05:00:00+0000", tz="UTC"),
@@ -542,13 +542,12 @@ def test_parse():
expr=(col("a").str.parse(int)),
df=pd.DataFrame({"a": ["1", "2", "3", "4"]}),
schema={"a": str},
- display="PARSE(col('a'), )",
+ display="PARSE(col(\"a\"), )",
refs={"a"},
eval_result=[1, 2, 3, 4],
expected_dtype=int,
proto_json=None,
),
- # Parse a struct
ExprTestCase(
expr=(col("a").str.parse(A)),
df=pd.DataFrame(
@@ -560,7 +559,7 @@ def test_parse():
}
),
schema={"a": str},
- display="PARSE(col('a'), )",
+ display="PARSE(col(\"a\"), )",
refs={"a"},
eval_result=[A(1, 2, "a"), A(2, 3, "b")],
expected_dtype=A,
@@ -571,7 +570,7 @@ def test_parse():
expr=(col("a").str.parse(List[int])),
df=pd.DataFrame({"a": ["[1, 2, 3]", "[4, 5, 6]"]}),
schema={"a": str},
- display="PARSE(col('a'), typing.List[int])",
+ display='PARSE(col("a"), typing.List[int])',
refs={"a"},
eval_result=[[1, 2, 3], [4, 5, 6]],
expected_dtype=List[int],
@@ -588,7 +587,7 @@ def test_parse():
}
),
schema={"a": str},
- display="PARSE(col('a'), )",
+ display="PARSE(col(\"a\"), )",
refs={"a"},
eval_result=[Nested(A(1, 2, "a"), B(1, "b"), [1, 2, 3])],
expected_dtype=Nested,
@@ -599,7 +598,7 @@ def test_parse():
expr=(col("a").str.parse(float)),
df=pd.DataFrame({"a": ["1.1", "2.2", "3.3", "4.4"]}),
schema={"a": str},
- display="PARSE(col('a'), )",
+ display="PARSE(col(\"a\"), )",
refs={"a"},
eval_result=[1.1, 2.2, 3.3, 4.4],
expected_dtype=float,
@@ -610,7 +609,7 @@ def test_parse():
expr=(col("a").str.parse(bool)),
df=pd.DataFrame({"a": ["true", "false", "true", "false"]}),
schema={"a": str},
- display="PARSE(col('a'), )",
+ display="PARSE(col(\"a\"), )",
refs={"a"},
eval_result=[True, False, True, False],
expected_dtype=bool,
@@ -621,7 +620,7 @@ def test_parse():
expr=(col("a").str.parse(str)),
df=pd.DataFrame({"a": ['"a1"', '"b"', '"c"', '"d"']}),
schema={"a": str},
- display="PARSE(col('a'), )",
+ display="PARSE(col(\"a\"), )",
refs={"a"},
eval_result=["a1", "b", "c", "d"],
expected_dtype=str,
@@ -713,7 +712,6 @@ def test_parse():
proto_json={},
),
]
-
for case in cases:
check_test_case(case)
@@ -724,7 +722,7 @@ def test_list():
expr=(col("a").list.at(0)),
df=pd.DataFrame({"a": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}),
schema={"a": List[int]},
- display="col('a')[0]",
+ display='col("a")[0]',
refs={"a"},
eval_result=[1, 4, 7],
expected_dtype=Optional[int],
@@ -745,7 +743,7 @@ def test_list():
}
),
schema={"a": List[int], "b": int, "c": int},
- display="col('a')[(col('b') + col('c'))]",
+ display='col("a")[(col("b") + col("c"))]',
refs={"a", "b", "c"},
eval_result=[2, 12, 9],
expected_dtype=Optional[int],
@@ -761,7 +759,7 @@ def test_list():
}
),
schema={"a": List[int], "b": int},
- display="col('a')[col('b')]",
+ display='col("a")[col("b")]',
refs={"a", "b"},
eval_result=[1, pd.NA, pd.NA],
expected_dtype=Optional[int],
@@ -772,7 +770,7 @@ def test_list():
expr=(~col("a").list.contains(3)),
df=pd.DataFrame({"a": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}),
schema={"a": List[int]},
- display="~(CONTAINS(col('a'), 3))",
+ display='~(CONTAINS(col("a"), 3))',
refs={"a"},
eval_result=[False, True, True],
expected_dtype=bool,
@@ -789,7 +787,7 @@ def test_list():
}
),
schema={"a": List[int], "b": int, "c": int},
- display="CONTAINS(col('a'), (col('b') * col('c')))",
+ display='CONTAINS(col("a"), (col("b") * col("c")))',
refs={"a", "b", "c"},
eval_result=[True, True, False],
expected_dtype=bool,
@@ -810,7 +808,7 @@ def test_list():
}
),
schema={"a2": List[str], "b2": str},
- display="""CONTAINS(col('a2'), col('b2'))""",
+ display="""CONTAINS(col("a2"), col("b2"))""",
refs={"a2", "b2"},
eval_result=[True, True, False, False],
expected_dtype=bool,
@@ -827,7 +825,7 @@ def test_list():
{"a": [[A(1, 2, "a"), A(2, 3, "b"), A(4, 5, "c")]]}
),
schema={"a": List[A]},
- display="""CONTAINS(col('a'), STRUCT(x=1, y=2, z="a"))""",
+ display="""CONTAINS(col("a"), STRUCT(x=1, y=2, z="a"))""",
refs={"a"},
eval_result=[True],
expected_dtype=bool,
@@ -839,7 +837,7 @@ def test_list():
{"a": [[A(1, 2, "a"), A(2, 3, "b"), A(4, 5, "c")]]}
),
schema={"a": List[A]},
- display="LEN(col('a'))",
+ display='LEN(col("a"))',
refs={"a"},
eval_result=[3],
expected_dtype=int,
@@ -850,7 +848,7 @@ def test_list():
expr=(col("a").list.len()),
df=pd.DataFrame({"a": [[1, 2, 3], [4, 5, 6, 12], [7, 8, 9, 19]]}),
schema={"a": List[int]},
- display="LEN(col('a'))",
+ display='LEN(col("a"))',
refs={"a"},
eval_result=[3, 4, 4],
expected_dtype=int,
@@ -861,7 +859,7 @@ def test_list():
expr=(col("a").list.len()),
df=pd.DataFrame({"a": [[], [4, 5, 6, 12], [7, 8, 9, 19]]}),
schema={"a": List[int]},
- display="LEN(col('a'))",
+ display='LEN(col("a"))',
refs={"a"},
eval_result=[0, 4, 4],
expected_dtype=int,
@@ -880,7 +878,7 @@ def test_struct():
expr=(col("a").struct.get("x")),
df=pd.DataFrame({"a": [A(1, 2, "a"), A(2, 3, "b"), A(4, 5, "c")]}),
schema={"a": A},
- display="col('a').x",
+ display='col("a").x',
refs={"a"},
eval_result=[1, 2, 4],
expected_dtype=int,
@@ -890,7 +888,7 @@ def test_struct():
expr=(col("a").struct.get("x") + col("a").struct.get("y")),
df=pd.DataFrame({"a": [A(1, 2, "a"), A(2, 3, "b"), A(4, 5, "c")]}),
schema={"a": A},
- display="(col('a').x + col('a').y)",
+ display='(col("a").x + col("a").y)',
refs={"a"},
eval_result=[3, 5, 9],
expected_dtype=int,
@@ -906,7 +904,7 @@ def test_datetime():
cases = [
# Extract year from a datetime
ExprTestCase(
- expr=(col("a").dt.year),
+ expr=(col("a").dt.year()),
df=pd.DataFrame(
{
"a": [
@@ -917,7 +915,7 @@ def test_datetime():
}
),
schema={"a": datetime},
- display="DATEPART(col('a'), TimeUnit.YEAR)",
+ display='DATEPART(col("a"), TimeUnit.YEAR)',
refs={"a"},
eval_result=[2021, 2021, 2021],
expected_dtype=int,
@@ -925,7 +923,7 @@ def test_datetime():
),
# Extract month from a datetime
ExprTestCase(
- expr=(col("a").dt.month),
+ expr=(col("a").dt.month()),
df=pd.DataFrame(
{
"a": [
@@ -936,7 +934,7 @@ def test_datetime():
}
),
schema={"a": datetime},
- display="DATEPART(col('a'), TimeUnit.MONTH)",
+ display='DATEPART(col("a"), TimeUnit.MONTH)',
refs={"a"},
eval_result=[1, 2, 3],
expected_dtype=int,
@@ -944,7 +942,7 @@ def test_datetime():
),
# Extract week from a datetime
ExprTestCase(
- expr=(col("a").dt.week),
+ expr=(col("a").dt.week()),
df=pd.DataFrame(
{
"a": [
@@ -955,7 +953,7 @@ def test_datetime():
}
),
schema={"a": datetime},
- display="DATEPART(col('a'), TimeUnit.WEEK)",
+ display='DATEPART(col("a"), TimeUnit.WEEK)',
refs={"a"},
eval_result=[53, 5, 9],
expected_dtype=int,
@@ -981,7 +979,7 @@ def test_datetime():
}
),
schema={"a": datetime},
- display="""SINCE(col('a'), STRPTIME("2021-01-01 00:01:00+0000", %Y-%m-%d %H:%M:%S%z, UTC), unit=TimeUnit.DAY)""",
+ display="""SINCE(col("a"), STRPTIME("2021-01-01 00:01:00+0000", %Y-%m-%d %H:%M:%S%z, UTC), unit=TimeUnit.DAY)""",
refs={"a"},
eval_result=[0, 32, 61],
expected_dtype=int,
@@ -1007,7 +1005,7 @@ def test_datetime():
}
),
schema={"a": datetime},
- display="""SINCE(col('a'), STRPTIME("2021-01-01 00:01:00+0000", %Y-%m-%d %H:%M:%S%z, UTC), unit=TimeUnit.YEAR)""",
+ display="""SINCE(col("a"), STRPTIME("2021-01-01 00:01:00+0000", %Y-%m-%d %H:%M:%S%z, UTC), unit=TimeUnit.YEAR)""",
refs={"a"},
eval_result=[0, 0, 5],
expected_dtype=int,
@@ -1026,7 +1024,7 @@ def test_datetime():
}
),
schema={"a": datetime},
- display="SINCE_EPOCH(col('a'), unit=TimeUnit.DAY)",
+ display='SINCE_EPOCH(col("a"), unit=TimeUnit.DAY)',
refs={"a"},
eval_result=[18628, 18660, 18689],
expected_dtype=int,
@@ -1045,7 +1043,7 @@ def test_datetime():
}
),
schema={"a": datetime},
- display="SINCE_EPOCH(col('a'), unit=TimeUnit.YEAR)",
+ display='SINCE_EPOCH(col("a"), unit=TimeUnit.YEAR)',
refs={"a"},
eval_result=[51, 51, 56],
expected_dtype=int,
@@ -1064,7 +1062,7 @@ def test_datetime():
}
),
schema={"a": datetime},
- display="STRFTIME(col('a'), %Y-%m-%d)",
+ display='STRFTIME(col("a"), %Y-%m-%d)',
refs={"a"},
eval_result=["2021-01-01", "2021-02-02", "2021-03-03"],
expected_dtype=str,
@@ -1083,7 +1081,7 @@ def test_datetime():
}
),
schema={"a": datetime},
- display="STRFTIME(col('a'), %Y-%m-%d %H:%M:%S)",
+ display='STRFTIME(col("a"), %Y-%m-%d %H:%M:%S)',
refs={"a"},
eval_result=[
"2021-01-01 00:01:00",
@@ -1135,7 +1133,7 @@ def test_make_struct():
),
df=pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}),
schema={"a": int, "b": int},
- display="""STRUCT(x=col('a'), y=(col('a') + col('b')), z="constant")""",
+ display="""STRUCT(x=col("a"), y=(col("a") + col("b")), z="constant")""",
refs={"a", "b"},
eval_result=[
A(1, 5, "constant"),
@@ -1177,7 +1175,7 @@ def test_make_struct():
"e": str,
"f": List[int],
},
- display="""STRUCT(a=STRUCT(x=col('a'), y=col('b'), z=col('c')), b=STRUCT(p=col('d'), q=col('e')), c=col('f'))""",
+ display="""STRUCT(a=STRUCT(x=col("a"), y=col("b"), z=col("c")), b=STRUCT(p=col("d"), q=col("e")), c=col("f"))""",
refs={"a", "b", "c", "d", "e", "f"},
eval_result=[
Nested(A(1, 4, "str_1"), B(10, "a"), [1, 2, 3]),
@@ -1200,7 +1198,7 @@ def test_from_epoch():
expr=(from_epoch(col("a"), unit="second")),
df=pd.DataFrame({"a": [1725321570, 1725321570]}),
schema={"a": int},
- display="""FROM_EPOCH(col('a'), unit=TimeUnit.SECOND)""",
+ display="""FROM_EPOCH(col("a"), unit=TimeUnit.SECOND)""",
refs={"a"},
eval_result=[
pd.Timestamp("2024-09-02 23:59:30+0000", tz="UTC"),
@@ -1214,7 +1212,7 @@ def test_from_epoch():
expr=(from_epoch(col("a") * col("b"), unit="millisecond")),
df=pd.DataFrame({"a": [1725321570, 1725321570], "b": [1000, 1000]}),
schema={"a": int, "b": int},
- display="""FROM_EPOCH((col('a') * col('b')), unit=TimeUnit.MILLISECOND)""",
+ display="""FROM_EPOCH((col("a") * col("b")), unit=TimeUnit.MILLISECOND)""",
refs=set(["a", "b"]),
eval_result=[
pd.Timestamp("2024-09-02 23:59:30+0000", tz="UTC"),
@@ -1235,7 +1233,7 @@ def test_fillnull():
expr=(col("a").fillnull(0)),
df=pd.DataFrame({"a": [1, 2, None, 4]}),
schema={"a": Optional[int]},
- display="FILL_NULL(col('a'), 0)",
+ display='FILL_NULL(col("a"), 0)',
refs={"a"},
eval_result=[1, 2, 0, 4],
expected_dtype=int,
@@ -1245,7 +1243,7 @@ def test_fillnull():
expr=(col("a").fillnull("missing")),
df=pd.DataFrame({"a": ["a", "b", None, "d"]}),
schema={"a": Optional[str]},
- display="FILL_NULL(col('a'), \"missing\")",
+ display='FILL_NULL(col("a"), "missing")',
refs={"a"},
eval_result=["a", "b", "missing", "d"],
expected_dtype=str,
@@ -1260,7 +1258,7 @@ def test_fillnull():
),
df=pd.DataFrame({"a": ["2021-01-01", None, "2021-01-03"]}),
schema={"a": Optional[str]},
- display="""FILL_NULL(STRPTIME(col('a'), %Y-%m-%d, UTC), STRPTIME("2021-01-01", %Y-%m-%d, UTC))""",
+ display="""FILL_NULL(STRPTIME(col("a"), %Y-%m-%d, UTC), STRPTIME("2021-01-01", %Y-%m-%d, UTC))""",
refs={"a"},
eval_result=[
pd.Timestamp("2021-01-01 00:00:00+0000", tz="UTC"),
@@ -1304,7 +1302,7 @@ def test_isnull():
expr=(col("a").isnull()),
df=pd.DataFrame({"a": [1, 2, None, 4]}),
schema={"a": Optional[int]},
- display="IS_NULL(col('a'))",
+ display='IS_NULL(col("a"))',
refs={"a"},
eval_result=[False, False, True, False],
expected_dtype=bool,
@@ -1314,7 +1312,7 @@ def test_isnull():
expr=(col("a").isnull()),
df=pd.DataFrame({"a": ["a", "b", None, "d"]}),
schema={"a": Optional[str]},
- display="IS_NULL(col('a'))",
+ display='IS_NULL(col("a"))',
refs={"a"},
eval_result=[False, False, True, False],
expected_dtype=bool,
@@ -1325,7 +1323,7 @@ def test_isnull():
expr=(col("a").isnull()),
df=pd.DataFrame({"a": [A(1, 2, "a"), A(2, 3, "b"), None]}),
schema={"a": Optional[A]},
- display="IS_NULL(col('a'))",
+ display='IS_NULL(col("a"))',
refs={"a"},
eval_result=[False, False, True],
expected_dtype=bool,
@@ -1336,7 +1334,7 @@ def test_isnull():
expr=(col("a").isnull()),
df=pd.DataFrame({"a": [[1, 2, 3], [4, 5, 6], None]}),
schema={"a": Optional[List[int]]},
- display="IS_NULL(col('a'))",
+ display='IS_NULL(col("a"))',
refs={"a"},
eval_result=[False, False, True],
expected_dtype=bool,
diff --git a/fennel/expr/test_invalid_expr.py b/fennel/expr/test_invalid_expr.py
index 06904138b..a26f339dc 100644
--- a/fennel/expr/test_invalid_expr.py
+++ b/fennel/expr/test_invalid_expr.py
@@ -27,8 +27,8 @@ def test_invalid_datetime():
expr.eval(df, {"a": str})
assert (
- str(e.value)
- == "Failed to compile expression: invalid timezone: America/NonYork"
+ "Failed to compile expression: invalid timezone: `America/NonYork`"
+ in str(e.value)
)
df = pd.DataFrame(
@@ -45,7 +45,7 @@ def test_invalid_datetime():
expr.eval(df, {"a": str})
assert (
str(e.value)
- == 'Failed to evaluate expression: failed to eval expression: col(a).str.parse_datetime("%Y-%m-%d", timezone=""America/New_York""), error: invalid operation: conversion from `str` to `datetime[μs, America/New_York]` failed in column \'a\' for 3 out of 3 values: ["1", "2", "3"]'
+ == 'Failed to evaluate expression: failed to eval expression: col("a").str.parse_datetime("%Y-%m-%d", timezone="America/New_York"), error: invalid operation: conversion from `str` to `datetime[μs, America/New_York]` failed in column \'a\' for 3 out of 3 values: ["1", "2", "3"]'
)
with pytest.raises(ValueError) as e:
@@ -64,7 +64,7 @@ def test_missing_then():
df = pd.DataFrame({"a": [1, 2, 3]})
with pytest.raises(InvalidExprException) as e:
expr.eval(df, {"a": int})
- assert str(e.value) == "THEN clause missing for WHEN clause col('a') == 1"
+ assert str(e.value) == 'THEN clause missing for WHEN clause col("a") == 1'
with pytest.raises(AttributeError) as e:
expr = when(col("a") == 1).when(col("a") == 2)
@@ -88,7 +88,7 @@ def test_struct():
assert (
str(e.value)
- == "invalid field access for struct, expected string but got col('b')"
+ == 'invalid field access for struct, expected string but got col("b")'
)
diff --git a/fennel/expr/visitor.py b/fennel/expr/visitor.py
index 8affab86f..5635fe7cd 100644
--- a/fennel/expr/visitor.py
+++ b/fennel/expr/visitor.py
@@ -4,6 +4,7 @@
DateTimeFromEpoch,
DateTimeParts,
DateTimeSince,
+ DateTimeLiteral,
DateTimeSinceEpoch,
DateTimeStrftime,
ListContains,
@@ -22,6 +23,7 @@
Otherwise,
Binary,
IsNull,
+ Var,
FillNull,
_Bool,
_Dict,
@@ -60,6 +62,12 @@ def visit(self, obj):
elif isinstance(obj, Ref):
ret = self.visitRef(obj)
+ elif isinstance(obj, Var):
+ ret = self.visitVar(obj)
+
+ elif isinstance(obj, DateTimeLiteral):
+ ret = self.visitDateTimeLiteral(obj)
+
elif isinstance(obj, Unary):
ret = self.visitUnary(obj)
@@ -115,6 +123,9 @@ def visit(self, obj):
def visitLiteral(self, obj):
raise NotImplementedError
+ def visitVar(self, obj):
+ raise NotImplementedError
+
def visitRef(self, obj):
raise NotImplementedError
@@ -166,6 +177,9 @@ def visitMakeStruct(self, obj):
def visitDateTimeFromEpoch(self, obj):
raise NotImplementedError
+ def visitDateTimeLiteral(self, obj):
+ raise NotImplementedError
+
class ExprPrinter(Visitor):
@@ -178,6 +192,9 @@ def visitLiteral(self, obj):
def visitRef(self, obj):
return str(obj)
+ def visitVar(self, obj):
+ return str(obj)
+
def visitUnary(self, obj):
return "%s(%s)" % (obj.op, self.visit(obj.operand))
@@ -322,6 +339,9 @@ def visitMakeStruct(self, obj):
def visitDateTimeFromEpoch(self, obj):
return f"FROM_EPOCH({self.visit(obj.duration)}, unit={obj.unit})"
+ def visitDateTimeLiteral(self, obj):
+ return f"DATETIME({obj.year}, {obj.month}, {obj.day}, {obj.hour}, {obj.minute}, {obj.second}, {obj.microsecond}, timezone={obj.timezone})"
+
class FetchReferences(Visitor):
@@ -419,3 +439,6 @@ def visitMakeStruct(self, obj):
def visitDateTimeFromEpoch(self, obj):
self.visit(obj.duration)
+
+ def visitDateTimeLiteral(self, obj):
+ pass
diff --git a/fennel/featuresets/test_invalid_featureset.py b/fennel/featuresets/test_invalid_featureset.py
index 13bd08e66..a0bf9163f 100644
--- a/fennel/featuresets/test_invalid_featureset.py
+++ b/fennel/featuresets/test_invalid_featureset.py
@@ -420,7 +420,7 @@ class UserInfo4:
assert (
str(e.value)
- == "error in expression based extractor 'col('age') * col('age')'; can not set default value for expressions, maybe use fillnull instead?"
+ == 'error in expression based extractor \'col("age") * col("age")\'; can not set default value for expressions, maybe use fillnull instead?'
)
# Incorrect type for an expression feature
@@ -436,7 +436,7 @@ class UserInfo5:
assert (
str(e.value)
- == "expression 'col('age') * col('age')' for feature 'age_squared' is of type 'str' not 'int'"
+ == "expression 'col(\"age\") * col(\"age\")' for feature 'age_squared' is of type 'str' not 'int'"
)
# Using dataset field in expression feature
diff --git a/fennel/gen/dataset_pb2.py b/fennel/gen/dataset_pb2.py
index 0c5a15da0..30befc88d 100644
--- a/fennel/gen/dataset_pb2.py
+++ b/fennel/gen/dataset_pb2.py
@@ -20,7 +20,7 @@
import fennel.gen.expr_pb2 as expr__pb2
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rdataset.proto\x12\x14\x66\x65nnel.proto.dataset\x1a\x1egoogle/protobuf/duration.proto\x1a\x0emetadata.proto\x1a\x0cpycode.proto\x1a\x0cschema.proto\x1a\nspec.proto\x1a\x0cwindow.proto\x1a\nexpr.proto\"\xe5\x03\n\x0b\x43oreDataset\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x31\n\x08metadata\x18\x02 \x01(\x0b\x32\x1f.fennel.proto.metadata.Metadata\x12/\n\x08\x64sschema\x18\x03 \x01(\x0b\x32\x1d.fennel.proto.schema.DSSchema\x12*\n\x07history\x18\x04 \x01(\x0b\x32\x19.google.protobuf.Duration\x12,\n\tretention\x18\x05 \x01(\x0b\x32\x19.google.protobuf.Duration\x12L\n\x0e\x66ield_metadata\x18\x06 \x03(\x0b\x32\x34.fennel.proto.dataset.CoreDataset.FieldMetadataEntry\x12+\n\x06pycode\x18\x07 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x19\n\x11is_source_dataset\x18\x08 \x01(\x08\x12\x0f\n\x07version\x18\t \x01(\r\x12\x0c\n\x04tags\x18\n \x03(\t\x1aU\n\x12\x46ieldMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12.\n\x05value\x18\x02 \x01(\x0b\x32\x1f.fennel.proto.metadata.Metadata:\x02\x38\x01\"Q\n\x08OnDemand\x12\x1c\n\x14\x66unction_source_code\x18\x01 \x01(\t\x12\x10\n\x08\x66unction\x18\x02 \x01(\x0c\x12\x15\n\rexpires_after\x18\x03 \x01(\x03\"\xd2\x01\n\x08Pipeline\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0c\x64\x61taset_name\x18\x02 \x01(\t\x12\x11\n\tsignature\x18\x03 \x01(\t\x12\x31\n\x08metadata\x18\x04 \x01(\x0b\x32\x1f.fennel.proto.metadata.Metadata\x12\x1b\n\x13input_dataset_names\x18\x05 \x03(\t\x12\x12\n\nds_version\x18\x06 \x01(\r\x12+\n\x06pycode\x18\x07 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\"\x8f\x08\n\x08Operator\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0f\n\x07is_root\x18\x02 \x01(\x08\x12\x15\n\rpipeline_name\x18\x03 \x01(\t\x12\x14\n\x0c\x64\x61taset_name\x18\x04 \x01(\t\x12\x12\n\nds_version\x18\x14 \x01(\r\x12\x34\n\taggregate\x18\x05 \x01(\x0b\x32\x1f.fennel.proto.dataset.AggregateH\x00\x12*\n\x04join\x18\x06 \x01(\x0b\x32\x1a.fennel.proto.dataset.JoinH\x00\x12\x34\n\ttransform\x18\x07 \x01(\x0b\x32\x1f.fennel.proto.dataset.TransformH\x00\x12,\n\x05union\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.dataset.UnionH\x00\x12.\n\x06\x66ilter\x18\t \x01(\x0b\x32\x1c.fennel.proto.dataset.FilterH\x00\x12\x37\n\x0b\x64\x61taset_ref\x18\n \x01(\x0b\x32 .fennel.proto.dataset.DatasetRefH\x00\x12.\n\x06rename\x18\x0c \x01(\x0b\x32\x1c.fennel.proto.dataset.RenameH\x00\x12*\n\x04\x64rop\x18\r \x01(\x0b\x32\x1a.fennel.proto.dataset.DropH\x00\x12\x30\n\x07\x65xplode\x18\x0e \x01(\x0b\x32\x1d.fennel.proto.dataset.ExplodeH\x00\x12,\n\x05\x64\x65\x64up\x18\x0f \x01(\x0b\x32\x1b.fennel.proto.dataset.DedupH\x00\x12,\n\x05\x66irst\x18\x10 \x01(\x0b\x32\x1b.fennel.proto.dataset.FirstH\x00\x12.\n\x06\x61ssign\x18\x11 \x01(\x0b\x32\x1c.fennel.proto.dataset.AssignH\x00\x12\x32\n\x08\x64ropnull\x18\x12 \x01(\x0b\x32\x1e.fennel.proto.dataset.DropnullH\x00\x12:\n\x06window\x18\x13 \x01(\x0b\x32(.fennel.proto.dataset.WindowOperatorKindH\x00\x12.\n\x06latest\x18\x15 \x01(\x0b\x32\x1c.fennel.proto.dataset.LatestH\x00\x12\x34\n\tchangelog\x18\x16 \x01(\x0b\x32\x1f.fennel.proto.dataset.ChangelogH\x00\x12\x37\n\x0b\x61ssign_expr\x18\x17 \x01(\x0b\x32 .fennel.proto.dataset.AssignExprH\x00\x12\x37\n\x0b\x66ilter_expr\x18\x18 \x01(\x0b\x32 .fennel.proto.dataset.FilterExprH\x00\x12\x0c\n\x04name\x18\x0b \x01(\tB\x06\n\x04kind\"\xc7\x01\n\tAggregate\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0c\n\x04keys\x18\x02 \x03(\t\x12)\n\x05specs\x18\x03 \x03(\x0b\x32\x1a.fennel.proto.spec.PreSpec\x12\x12\n\x05\x61long\x18\x05 \x01(\tH\x00\x88\x01\x01\x12\x39\n\remit_strategy\x18\x06 \x01(\x0e\x32\".fennel.proto.dataset.EmitStrategy\x12\x14\n\x0coperand_name\x18\x04 \x01(\tB\x08\n\x06_along\"\xb9\x03\n\x04Join\x12\x16\n\x0elhs_operand_id\x18\x01 \x01(\t\x12\x1c\n\x14rhs_dsref_operand_id\x18\x02 \x01(\t\x12.\n\x02on\x18\x03 \x03(\x0b\x32\".fennel.proto.dataset.Join.OnEntry\x12\x32\n\nwithin_low\x18\x06 \x01(\x0b\x32\x19.google.protobuf.DurationH\x00\x88\x01\x01\x12\x33\n\x0bwithin_high\x18\x07 \x01(\x0b\x32\x19.google.protobuf.DurationH\x01\x88\x01\x01\x12\x18\n\x10lhs_operand_name\x18\x04 \x01(\t\x12\x1e\n\x16rhs_dsref_operand_name\x18\x05 \x01(\t\x12+\n\x03how\x18\x08 \x01(\x0e\x32\x1e.fennel.proto.dataset.Join.How\x12\x15\n\tbroadcast\x18\t \x01(\x08\x42\x02\x18\x01\x1a)\n\x07OnEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x1a\n\x03How\x12\x08\n\x04Left\x10\x00\x12\t\n\x05Inner\x10\x01\x42\r\n\x0b_within_lowB\x0e\n\x0c_within_high\"\xed\x01\n\tTransform\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12;\n\x06schema\x18\x02 \x03(\x0b\x32+.fennel.proto.dataset.Transform.SchemaEntry\x12+\n\x06pycode\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x14\n\x0coperand_name\x18\x04 \x01(\t\x1aL\n\x0bSchemaEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12,\n\x05value\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType:\x02\x38\x01\"]\n\nFilterExpr\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12%\n\x04\x65xpr\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"_\n\x06\x46ilter\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12+\n\x06pycode\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"\xa8\x01\n\x06\x41ssign\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12+\n\x06pycode\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x13\n\x0b\x63olumn_name\x18\x03 \x01(\t\x12\x32\n\x0boutput_type\x18\x04 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType\x12\x14\n\x0coperand_name\x18\x05 \x01(\t\"\xd5\x02\n\nAssignExpr\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12:\n\x05\x65xprs\x18\x02 \x03(\x0b\x32+.fennel.proto.dataset.AssignExpr.ExprsEntry\x12G\n\x0coutput_types\x18\x03 \x03(\x0b\x32\x31.fennel.proto.dataset.AssignExpr.OutputTypesEntry\x12\x14\n\x0coperand_name\x18\x05 \x01(\t\x1a\x45\n\nExprsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12&\n\x05value\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr:\x02\x38\x01\x1aQ\n\x10OutputTypesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12,\n\x05value\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType:\x02\x38\x01\"E\n\x08\x44ropnull\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0f\n\x07\x63olumns\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"B\n\x04\x44rop\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x10\n\x08\x64ropcols\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"\xa5\x01\n\x06Rename\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12?\n\ncolumn_map\x18\x02 \x03(\x0b\x32+.fennel.proto.dataset.Rename.ColumnMapEntry\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\x1a\x30\n\x0e\x43olumnMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"3\n\x05Union\x12\x13\n\x0boperand_ids\x18\x01 \x03(\t\x12\x15\n\roperand_names\x18\x02 \x03(\t\"B\n\x05\x44\x65\x64up\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0f\n\x07\x63olumns\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"D\n\x07\x45xplode\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0f\n\x07\x63olumns\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"=\n\x05\x46irst\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\n\n\x02\x62y\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\">\n\x06Latest\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\n\n\x02\x62y\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"L\n\tChangelog\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x15\n\rdelete_column\x18\x02 \x01(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"\xcb\x01\n\x12WindowOperatorKind\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x30\n\x0bwindow_type\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.window.Window\x12\n\n\x02\x62y\x18\x03 \x03(\t\x12\r\n\x05\x66ield\x18\x04 \x01(\t\x12\x32\n\x07summary\x18\x06 \x01(\x0b\x32\x1c.fennel.proto.window.SummaryH\x00\x88\x01\x01\x12\x14\n\x0coperand_name\x18\x05 \x01(\tB\n\n\x08_summary\",\n\nDatasetRef\x12\x1e\n\x16referring_dataset_name\x18\x01 \x01(\t\"\x80\x02\n\x08\x44\x61taflow\x12\x16\n\x0c\x64\x61taset_name\x18\x01 \x01(\tH\x00\x12L\n\x11pipeline_dataflow\x18\x02 \x01(\x0b\x32/.fennel.proto.dataset.Dataflow.PipelineDataflowH\x00\x12\x0c\n\x04tags\x18\x03 \x03(\t\x1ax\n\x10PipelineDataflow\x12\x14\n\x0c\x64\x61taset_name\x18\x01 \x01(\t\x12\x15\n\rpipeline_name\x18\x02 \x01(\t\x12\x37\n\x0finput_dataflows\x18\x03 \x03(\x0b\x32\x1e.fennel.proto.dataset.DataflowB\x06\n\x04kind\"\x9c\x01\n\x10PipelineLineages\x12\x14\n\x0c\x64\x61taset_name\x18\x01 \x01(\t\x12\x15\n\rpipeline_name\x18\x02 \x01(\t\x12=\n\x0einput_datasets\x18\x03 \x03(\x0b\x32%.fennel.proto.dataset.DatasetLineages\x12\x0e\n\x06\x61\x63tive\x18\x04 \x01(\x08\x12\x0c\n\x04tags\x18\x05 \x03(\t\"\\\n\x17\x44\x61tasetPipelineLineages\x12\x41\n\x11pipeline_lineages\x18\x02 \x03(\x0b\x32&.fennel.proto.dataset.PipelineLineages\"\x8b\x01\n\x0f\x44\x61tasetLineages\x12\x18\n\x0esource_dataset\x18\x01 \x01(\tH\x00\x12H\n\x0f\x64\x65rived_dataset\x18\x02 \x01(\x0b\x32-.fennel.proto.dataset.DatasetPipelineLineagesH\x00\x12\x0c\n\x04tags\x18\x03 \x03(\tB\x06\n\x04kind*$\n\x0c\x45mitStrategy\x12\t\n\x05\x45\x61ger\x10\x00\x12\t\n\x05\x46inal\x10\x01\x62\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rdataset.proto\x12\x14\x66\x65nnel.proto.dataset\x1a\x1egoogle/protobuf/duration.proto\x1a\x0emetadata.proto\x1a\x0cpycode.proto\x1a\x0cschema.proto\x1a\nspec.proto\x1a\x0cwindow.proto\x1a\nexpr.proto\"\xe5\x03\n\x0b\x43oreDataset\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x31\n\x08metadata\x18\x02 \x01(\x0b\x32\x1f.fennel.proto.metadata.Metadata\x12/\n\x08\x64sschema\x18\x03 \x01(\x0b\x32\x1d.fennel.proto.schema.DSSchema\x12*\n\x07history\x18\x04 \x01(\x0b\x32\x19.google.protobuf.Duration\x12,\n\tretention\x18\x05 \x01(\x0b\x32\x19.google.protobuf.Duration\x12L\n\x0e\x66ield_metadata\x18\x06 \x03(\x0b\x32\x34.fennel.proto.dataset.CoreDataset.FieldMetadataEntry\x12+\n\x06pycode\x18\x07 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x19\n\x11is_source_dataset\x18\x08 \x01(\x08\x12\x0f\n\x07version\x18\t \x01(\r\x12\x0c\n\x04tags\x18\n \x03(\t\x1aU\n\x12\x46ieldMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12.\n\x05value\x18\x02 \x01(\x0b\x32\x1f.fennel.proto.metadata.Metadata:\x02\x38\x01\"Q\n\x08OnDemand\x12\x1c\n\x14\x66unction_source_code\x18\x01 \x01(\t\x12\x10\n\x08\x66unction\x18\x02 \x01(\x0c\x12\x15\n\rexpires_after\x18\x03 \x01(\x03\"\xd2\x01\n\x08Pipeline\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0c\x64\x61taset_name\x18\x02 \x01(\t\x12\x11\n\tsignature\x18\x03 \x01(\t\x12\x31\n\x08metadata\x18\x04 \x01(\x0b\x32\x1f.fennel.proto.metadata.Metadata\x12\x1b\n\x13input_dataset_names\x18\x05 \x03(\t\x12\x12\n\nds_version\x18\x06 \x01(\r\x12+\n\x06pycode\x18\x07 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\"\x8f\x08\n\x08Operator\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0f\n\x07is_root\x18\x02 \x01(\x08\x12\x15\n\rpipeline_name\x18\x03 \x01(\t\x12\x14\n\x0c\x64\x61taset_name\x18\x04 \x01(\t\x12\x12\n\nds_version\x18\x14 \x01(\r\x12\x34\n\taggregate\x18\x05 \x01(\x0b\x32\x1f.fennel.proto.dataset.AggregateH\x00\x12*\n\x04join\x18\x06 \x01(\x0b\x32\x1a.fennel.proto.dataset.JoinH\x00\x12\x34\n\ttransform\x18\x07 \x01(\x0b\x32\x1f.fennel.proto.dataset.TransformH\x00\x12,\n\x05union\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.dataset.UnionH\x00\x12.\n\x06\x66ilter\x18\t \x01(\x0b\x32\x1c.fennel.proto.dataset.FilterH\x00\x12\x37\n\x0b\x64\x61taset_ref\x18\n \x01(\x0b\x32 .fennel.proto.dataset.DatasetRefH\x00\x12.\n\x06rename\x18\x0c \x01(\x0b\x32\x1c.fennel.proto.dataset.RenameH\x00\x12*\n\x04\x64rop\x18\r \x01(\x0b\x32\x1a.fennel.proto.dataset.DropH\x00\x12\x30\n\x07\x65xplode\x18\x0e \x01(\x0b\x32\x1d.fennel.proto.dataset.ExplodeH\x00\x12,\n\x05\x64\x65\x64up\x18\x0f \x01(\x0b\x32\x1b.fennel.proto.dataset.DedupH\x00\x12,\n\x05\x66irst\x18\x10 \x01(\x0b\x32\x1b.fennel.proto.dataset.FirstH\x00\x12.\n\x06\x61ssign\x18\x11 \x01(\x0b\x32\x1c.fennel.proto.dataset.AssignH\x00\x12\x32\n\x08\x64ropnull\x18\x12 \x01(\x0b\x32\x1e.fennel.proto.dataset.DropnullH\x00\x12:\n\x06window\x18\x13 \x01(\x0b\x32(.fennel.proto.dataset.WindowOperatorKindH\x00\x12.\n\x06latest\x18\x15 \x01(\x0b\x32\x1c.fennel.proto.dataset.LatestH\x00\x12\x34\n\tchangelog\x18\x16 \x01(\x0b\x32\x1f.fennel.proto.dataset.ChangelogH\x00\x12\x37\n\x0b\x61ssign_expr\x18\x17 \x01(\x0b\x32 .fennel.proto.dataset.AssignExprH\x00\x12\x37\n\x0b\x66ilter_expr\x18\x18 \x01(\x0b\x32 .fennel.proto.dataset.FilterExprH\x00\x12\x0c\n\x04name\x18\x0b \x01(\tB\x06\n\x04kind\"\xc7\x01\n\tAggregate\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0c\n\x04keys\x18\x02 \x03(\t\x12)\n\x05specs\x18\x03 \x03(\x0b\x32\x1a.fennel.proto.spec.PreSpec\x12\x12\n\x05\x61long\x18\x05 \x01(\tH\x00\x88\x01\x01\x12\x39\n\remit_strategy\x18\x06 \x01(\x0e\x32\".fennel.proto.dataset.EmitStrategy\x12\x14\n\x0coperand_name\x18\x04 \x01(\tB\x08\n\x06_along\"\xcd\x03\n\x04Join\x12\x16\n\x0elhs_operand_id\x18\x01 \x01(\t\x12\x1c\n\x14rhs_dsref_operand_id\x18\x02 \x01(\t\x12.\n\x02on\x18\x03 \x03(\x0b\x32\".fennel.proto.dataset.Join.OnEntry\x12\x32\n\nwithin_low\x18\x06 \x01(\x0b\x32\x19.google.protobuf.DurationH\x00\x88\x01\x01\x12\x33\n\x0bwithin_high\x18\x07 \x01(\x0b\x32\x19.google.protobuf.DurationH\x01\x88\x01\x01\x12\x18\n\x10lhs_operand_name\x18\x04 \x01(\t\x12\x1e\n\x16rhs_dsref_operand_name\x18\x05 \x01(\t\x12+\n\x03how\x18\x08 \x01(\x0e\x32\x1e.fennel.proto.dataset.Join.How\x12\x15\n\tbroadcast\x18\t \x01(\x08\x42\x02\x18\x01\x12\x12\n\nrhs_fields\x18\n \x03(\t\x1a)\n\x07OnEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x1a\n\x03How\x12\x08\n\x04Left\x10\x00\x12\t\n\x05Inner\x10\x01\x42\r\n\x0b_within_lowB\x0e\n\x0c_within_high\"\xed\x01\n\tTransform\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12;\n\x06schema\x18\x02 \x03(\x0b\x32+.fennel.proto.dataset.Transform.SchemaEntry\x12+\n\x06pycode\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x14\n\x0coperand_name\x18\x04 \x01(\t\x1aL\n\x0bSchemaEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12,\n\x05value\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType:\x02\x38\x01\"]\n\nFilterExpr\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12%\n\x04\x65xpr\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"_\n\x06\x46ilter\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12+\n\x06pycode\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"\xa8\x01\n\x06\x41ssign\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12+\n\x06pycode\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.pycode.PyCode\x12\x13\n\x0b\x63olumn_name\x18\x03 \x01(\t\x12\x32\n\x0boutput_type\x18\x04 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType\x12\x14\n\x0coperand_name\x18\x05 \x01(\t\"\xd5\x02\n\nAssignExpr\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12:\n\x05\x65xprs\x18\x02 \x03(\x0b\x32+.fennel.proto.dataset.AssignExpr.ExprsEntry\x12G\n\x0coutput_types\x18\x03 \x03(\x0b\x32\x31.fennel.proto.dataset.AssignExpr.OutputTypesEntry\x12\x14\n\x0coperand_name\x18\x05 \x01(\t\x1a\x45\n\nExprsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12&\n\x05value\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr:\x02\x38\x01\x1aQ\n\x10OutputTypesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12,\n\x05value\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType:\x02\x38\x01\"E\n\x08\x44ropnull\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0f\n\x07\x63olumns\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"B\n\x04\x44rop\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x10\n\x08\x64ropcols\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"\xa5\x01\n\x06Rename\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12?\n\ncolumn_map\x18\x02 \x03(\x0b\x32+.fennel.proto.dataset.Rename.ColumnMapEntry\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\x1a\x30\n\x0e\x43olumnMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"3\n\x05Union\x12\x13\n\x0boperand_ids\x18\x01 \x03(\t\x12\x15\n\roperand_names\x18\x02 \x03(\t\"B\n\x05\x44\x65\x64up\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0f\n\x07\x63olumns\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"D\n\x07\x45xplode\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x0f\n\x07\x63olumns\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"=\n\x05\x46irst\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\n\n\x02\x62y\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\">\n\x06Latest\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\n\n\x02\x62y\x18\x02 \x03(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"L\n\tChangelog\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x15\n\rdelete_column\x18\x02 \x01(\t\x12\x14\n\x0coperand_name\x18\x03 \x01(\t\"\xcb\x01\n\x12WindowOperatorKind\x12\x12\n\noperand_id\x18\x01 \x01(\t\x12\x30\n\x0bwindow_type\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.window.Window\x12\n\n\x02\x62y\x18\x03 \x03(\t\x12\r\n\x05\x66ield\x18\x04 \x01(\t\x12\x32\n\x07summary\x18\x06 \x01(\x0b\x32\x1c.fennel.proto.window.SummaryH\x00\x88\x01\x01\x12\x14\n\x0coperand_name\x18\x05 \x01(\tB\n\n\x08_summary\",\n\nDatasetRef\x12\x1e\n\x16referring_dataset_name\x18\x01 \x01(\t\"\x80\x02\n\x08\x44\x61taflow\x12\x16\n\x0c\x64\x61taset_name\x18\x01 \x01(\tH\x00\x12L\n\x11pipeline_dataflow\x18\x02 \x01(\x0b\x32/.fennel.proto.dataset.Dataflow.PipelineDataflowH\x00\x12\x0c\n\x04tags\x18\x03 \x03(\t\x1ax\n\x10PipelineDataflow\x12\x14\n\x0c\x64\x61taset_name\x18\x01 \x01(\t\x12\x15\n\rpipeline_name\x18\x02 \x01(\t\x12\x37\n\x0finput_dataflows\x18\x03 \x03(\x0b\x32\x1e.fennel.proto.dataset.DataflowB\x06\n\x04kind\"\x9c\x01\n\x10PipelineLineages\x12\x14\n\x0c\x64\x61taset_name\x18\x01 \x01(\t\x12\x15\n\rpipeline_name\x18\x02 \x01(\t\x12=\n\x0einput_datasets\x18\x03 \x03(\x0b\x32%.fennel.proto.dataset.DatasetLineages\x12\x0e\n\x06\x61\x63tive\x18\x04 \x01(\x08\x12\x0c\n\x04tags\x18\x05 \x03(\t\"\\\n\x17\x44\x61tasetPipelineLineages\x12\x41\n\x11pipeline_lineages\x18\x02 \x03(\x0b\x32&.fennel.proto.dataset.PipelineLineages\"\x8b\x01\n\x0f\x44\x61tasetLineages\x12\x18\n\x0esource_dataset\x18\x01 \x01(\tH\x00\x12H\n\x0f\x64\x65rived_dataset\x18\x02 \x01(\x0b\x32-.fennel.proto.dataset.DatasetPipelineLineagesH\x00\x12\x0c\n\x04tags\x18\x03 \x03(\tB\x06\n\x04kind*$\n\x0c\x45mitStrategy\x12\t\n\x05\x45\x61ger\x10\x00\x12\t\n\x05\x46inal\x10\x01\x62\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
@@ -41,8 +41,8 @@
_globals['_ASSIGNEXPR_OUTPUTTYPESENTRY']._serialized_options = b'8\001'
_globals['_RENAME_COLUMNMAPENTRY']._options = None
_globals['_RENAME_COLUMNMAPENTRY']._serialized_options = b'8\001'
- _globals['_EMITSTRATEGY']._serialized_start=5181
- _globals['_EMITSTRATEGY']._serialized_end=5217
+ _globals['_EMITSTRATEGY']._serialized_start=5201
+ _globals['_EMITSTRATEGY']._serialized_end=5237
_globals['_COREDATASET']._serialized_start=154
_globals['_COREDATASET']._serialized_end=639
_globals['_COREDATASET_FIELDMETADATAENTRY']._serialized_start=554
@@ -56,59 +56,59 @@
_globals['_AGGREGATE']._serialized_start=1980
_globals['_AGGREGATE']._serialized_end=2179
_globals['_JOIN']._serialized_start=2182
- _globals['_JOIN']._serialized_end=2623
- _globals['_JOIN_ONENTRY']._serialized_start=2523
- _globals['_JOIN_ONENTRY']._serialized_end=2564
- _globals['_JOIN_HOW']._serialized_start=2566
- _globals['_JOIN_HOW']._serialized_end=2592
- _globals['_TRANSFORM']._serialized_start=2626
- _globals['_TRANSFORM']._serialized_end=2863
- _globals['_TRANSFORM_SCHEMAENTRY']._serialized_start=2787
- _globals['_TRANSFORM_SCHEMAENTRY']._serialized_end=2863
- _globals['_FILTEREXPR']._serialized_start=2865
- _globals['_FILTEREXPR']._serialized_end=2958
- _globals['_FILTER']._serialized_start=2960
- _globals['_FILTER']._serialized_end=3055
- _globals['_ASSIGN']._serialized_start=3058
- _globals['_ASSIGN']._serialized_end=3226
- _globals['_ASSIGNEXPR']._serialized_start=3229
- _globals['_ASSIGNEXPR']._serialized_end=3570
- _globals['_ASSIGNEXPR_EXPRSENTRY']._serialized_start=3418
- _globals['_ASSIGNEXPR_EXPRSENTRY']._serialized_end=3487
- _globals['_ASSIGNEXPR_OUTPUTTYPESENTRY']._serialized_start=3489
- _globals['_ASSIGNEXPR_OUTPUTTYPESENTRY']._serialized_end=3570
- _globals['_DROPNULL']._serialized_start=3572
- _globals['_DROPNULL']._serialized_end=3641
- _globals['_DROP']._serialized_start=3643
- _globals['_DROP']._serialized_end=3709
- _globals['_RENAME']._serialized_start=3712
- _globals['_RENAME']._serialized_end=3877
- _globals['_RENAME_COLUMNMAPENTRY']._serialized_start=3829
- _globals['_RENAME_COLUMNMAPENTRY']._serialized_end=3877
- _globals['_UNION']._serialized_start=3879
- _globals['_UNION']._serialized_end=3930
- _globals['_DEDUP']._serialized_start=3932
- _globals['_DEDUP']._serialized_end=3998
- _globals['_EXPLODE']._serialized_start=4000
- _globals['_EXPLODE']._serialized_end=4068
- _globals['_FIRST']._serialized_start=4070
- _globals['_FIRST']._serialized_end=4131
- _globals['_LATEST']._serialized_start=4133
- _globals['_LATEST']._serialized_end=4195
- _globals['_CHANGELOG']._serialized_start=4197
- _globals['_CHANGELOG']._serialized_end=4273
- _globals['_WINDOWOPERATORKIND']._serialized_start=4276
- _globals['_WINDOWOPERATORKIND']._serialized_end=4479
- _globals['_DATASETREF']._serialized_start=4481
- _globals['_DATASETREF']._serialized_end=4525
- _globals['_DATAFLOW']._serialized_start=4528
- _globals['_DATAFLOW']._serialized_end=4784
- _globals['_DATAFLOW_PIPELINEDATAFLOW']._serialized_start=4656
- _globals['_DATAFLOW_PIPELINEDATAFLOW']._serialized_end=4776
- _globals['_PIPELINELINEAGES']._serialized_start=4787
- _globals['_PIPELINELINEAGES']._serialized_end=4943
- _globals['_DATASETPIPELINELINEAGES']._serialized_start=4945
- _globals['_DATASETPIPELINELINEAGES']._serialized_end=5037
- _globals['_DATASETLINEAGES']._serialized_start=5040
- _globals['_DATASETLINEAGES']._serialized_end=5179
+ _globals['_JOIN']._serialized_end=2643
+ _globals['_JOIN_ONENTRY']._serialized_start=2543
+ _globals['_JOIN_ONENTRY']._serialized_end=2584
+ _globals['_JOIN_HOW']._serialized_start=2586
+ _globals['_JOIN_HOW']._serialized_end=2612
+ _globals['_TRANSFORM']._serialized_start=2646
+ _globals['_TRANSFORM']._serialized_end=2883
+ _globals['_TRANSFORM_SCHEMAENTRY']._serialized_start=2807
+ _globals['_TRANSFORM_SCHEMAENTRY']._serialized_end=2883
+ _globals['_FILTEREXPR']._serialized_start=2885
+ _globals['_FILTEREXPR']._serialized_end=2978
+ _globals['_FILTER']._serialized_start=2980
+ _globals['_FILTER']._serialized_end=3075
+ _globals['_ASSIGN']._serialized_start=3078
+ _globals['_ASSIGN']._serialized_end=3246
+ _globals['_ASSIGNEXPR']._serialized_start=3249
+ _globals['_ASSIGNEXPR']._serialized_end=3590
+ _globals['_ASSIGNEXPR_EXPRSENTRY']._serialized_start=3438
+ _globals['_ASSIGNEXPR_EXPRSENTRY']._serialized_end=3507
+ _globals['_ASSIGNEXPR_OUTPUTTYPESENTRY']._serialized_start=3509
+ _globals['_ASSIGNEXPR_OUTPUTTYPESENTRY']._serialized_end=3590
+ _globals['_DROPNULL']._serialized_start=3592
+ _globals['_DROPNULL']._serialized_end=3661
+ _globals['_DROP']._serialized_start=3663
+ _globals['_DROP']._serialized_end=3729
+ _globals['_RENAME']._serialized_start=3732
+ _globals['_RENAME']._serialized_end=3897
+ _globals['_RENAME_COLUMNMAPENTRY']._serialized_start=3849
+ _globals['_RENAME_COLUMNMAPENTRY']._serialized_end=3897
+ _globals['_UNION']._serialized_start=3899
+ _globals['_UNION']._serialized_end=3950
+ _globals['_DEDUP']._serialized_start=3952
+ _globals['_DEDUP']._serialized_end=4018
+ _globals['_EXPLODE']._serialized_start=4020
+ _globals['_EXPLODE']._serialized_end=4088
+ _globals['_FIRST']._serialized_start=4090
+ _globals['_FIRST']._serialized_end=4151
+ _globals['_LATEST']._serialized_start=4153
+ _globals['_LATEST']._serialized_end=4215
+ _globals['_CHANGELOG']._serialized_start=4217
+ _globals['_CHANGELOG']._serialized_end=4293
+ _globals['_WINDOWOPERATORKIND']._serialized_start=4296
+ _globals['_WINDOWOPERATORKIND']._serialized_end=4499
+ _globals['_DATASETREF']._serialized_start=4501
+ _globals['_DATASETREF']._serialized_end=4545
+ _globals['_DATAFLOW']._serialized_start=4548
+ _globals['_DATAFLOW']._serialized_end=4804
+ _globals['_DATAFLOW_PIPELINEDATAFLOW']._serialized_start=4676
+ _globals['_DATAFLOW_PIPELINEDATAFLOW']._serialized_end=4796
+ _globals['_PIPELINELINEAGES']._serialized_start=4807
+ _globals['_PIPELINELINEAGES']._serialized_end=4963
+ _globals['_DATASETPIPELINELINEAGES']._serialized_start=4965
+ _globals['_DATASETPIPELINELINEAGES']._serialized_end=5057
+ _globals['_DATASETLINEAGES']._serialized_start=5060
+ _globals['_DATASETLINEAGES']._serialized_end=5199
# @@protoc_insertion_point(module_scope)
diff --git a/fennel/gen/dataset_pb2.pyi b/fennel/gen/dataset_pb2.pyi
index 4c4da1118..881427cea 100644
--- a/fennel/gen/dataset_pb2.pyi
+++ b/fennel/gen/dataset_pb2.pyi
@@ -371,6 +371,7 @@ class Join(google.protobuf.message.Message):
RHS_DSREF_OPERAND_NAME_FIELD_NUMBER: builtins.int
HOW_FIELD_NUMBER: builtins.int
BROADCAST_FIELD_NUMBER: builtins.int
+ RHS_FIELDS_FIELD_NUMBER: builtins.int
lhs_operand_id: builtins.str
rhs_dsref_operand_id: builtins.str
"""RHS of a JOIN can only be a dataset, here it refers to the DSRef operator"""
@@ -388,6 +389,9 @@ class Join(google.protobuf.message.Message):
rhs_dsref_operand_name: builtins.str
how: global___Join.How.ValueType
broadcast: builtins.bool
+ @property
+ def rhs_fields(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+ """Only select a subset of fields from RHS, empty means select all"""
def __init__(
self,
*,
@@ -400,9 +404,10 @@ class Join(google.protobuf.message.Message):
rhs_dsref_operand_name: builtins.str = ...,
how: global___Join.How.ValueType = ...,
broadcast: builtins.bool = ...,
+ rhs_fields: collections.abc.Iterable[builtins.str] | None = ...,
) -> None: ...
def HasField(self, field_name: typing_extensions.Literal["_within_high", b"_within_high", "_within_low", b"_within_low", "within_high", b"within_high", "within_low", b"within_low"]) -> builtins.bool: ...
- def ClearField(self, field_name: typing_extensions.Literal["_within_high", b"_within_high", "_within_low", b"_within_low", "broadcast", b"broadcast", "how", b"how", "lhs_operand_id", b"lhs_operand_id", "lhs_operand_name", b"lhs_operand_name", "on", b"on", "rhs_dsref_operand_id", b"rhs_dsref_operand_id", "rhs_dsref_operand_name", b"rhs_dsref_operand_name", "within_high", b"within_high", "within_low", b"within_low"]) -> None: ...
+ def ClearField(self, field_name: typing_extensions.Literal["_within_high", b"_within_high", "_within_low", b"_within_low", "broadcast", b"broadcast", "how", b"how", "lhs_operand_id", b"lhs_operand_id", "lhs_operand_name", b"lhs_operand_name", "on", b"on", "rhs_dsref_operand_id", b"rhs_dsref_operand_id", "rhs_dsref_operand_name", b"rhs_dsref_operand_name", "rhs_fields", b"rhs_fields", "within_high", b"within_high", "within_low", b"within_low"]) -> None: ...
@typing.overload
def WhichOneof(self, oneof_group: typing_extensions.Literal["_within_high", b"_within_high"]) -> typing_extensions.Literal["within_high"] | None: ...
@typing.overload
diff --git a/fennel/gen/expr_pb2.py b/fennel/gen/expr_pb2.py
index d3391b295..69df78aa2 100644
--- a/fennel/gen/expr_pb2.py
+++ b/fennel/gen/expr_pb2.py
@@ -14,7 +14,7 @@
import fennel.gen.schema_pb2 as schema__pb2
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\nexpr.proto\x12\x11\x66\x65nnel.proto.expr\x1a\x0cschema.proto\"\x9a\x06\n\x04\x45xpr\x12%\n\x03ref\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.RefH\x00\x12\x36\n\x0cjson_literal\x18\x02 \x01(\x0b\x32\x1e.fennel.proto.expr.JsonLiteralH\x00\x12)\n\x05unary\x18\x04 \x01(\x0b\x32\x18.fennel.proto.expr.UnaryH\x00\x12\'\n\x04\x63\x61se\x18\x05 \x01(\x0b\x32\x17.fennel.proto.expr.CaseH\x00\x12+\n\x06\x62inary\x18\x06 \x01(\x0b\x32\x19.fennel.proto.expr.BinaryH\x00\x12+\n\x06isnull\x18\x07 \x01(\x0b\x32\x19.fennel.proto.expr.IsNullH\x00\x12/\n\x08\x66illnull\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.expr.FillNullH\x00\x12,\n\x07list_fn\x18\t \x01(\x0b\x32\x19.fennel.proto.expr.ListFnH\x00\x12,\n\x07math_fn\x18\n \x01(\x0b\x32\x19.fennel.proto.expr.MathFnH\x00\x12\x30\n\tstruct_fn\x18\x0b \x01(\x0b\x32\x1b.fennel.proto.expr.StructFnH\x00\x12,\n\x07\x64ict_fn\x18\x0c \x01(\x0b\x32\x19.fennel.proto.expr.DictFnH\x00\x12\x30\n\tstring_fn\x18\r \x01(\x0b\x32\x1b.fennel.proto.expr.StringFnH\x00\x12\x34\n\x0b\x64\x61tetime_fn\x18\x0e \x01(\x0b\x32\x1d.fennel.proto.expr.DateTimeFnH\x00\x12>\n\x10\x64\x61tetime_literal\x18\x0f \x01(\x0b\x32\".fennel.proto.expr.DatetimeLiteralH\x00\x12\x34\n\x0bmake_struct\x18\x10 \x01(\x0b\x32\x1d.fennel.proto.expr.MakeStructH\x00\x12\x32\n\nfrom_epoch\x18\x11 \x01(\x0b\x32\x1c.fennel.proto.expr.FromEpochH\x00\x42\x06\n\x04node\"a\n\tFromEpoch\x12)\n\x08\x64uration\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12)\n\x04unit\x18\x02 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit\"\xad\x01\n\x0f\x44\x61tetimeLiteral\x12\x0c\n\x04year\x18\x01 \x01(\r\x12\r\n\x05month\x18\x02 \x01(\r\x12\x0b\n\x03\x64\x61y\x18\x03 \x01(\r\x12\x0c\n\x04hour\x18\x04 \x01(\r\x12\x0e\n\x06minute\x18\x05 \x01(\r\x12\x0e\n\x06second\x18\x06 \x01(\r\x12\x13\n\x0bmicrosecond\x18\x07 \x01(\r\x12-\n\x08timezone\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.expr.Timezone\"\xc5\x01\n\nMakeStruct\x12\x34\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x1f.fennel.proto.schema.StructType\x12\x39\n\x06\x66ields\x18\x02 \x03(\x0b\x32).fennel.proto.expr.MakeStruct.FieldsEntry\x1a\x46\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12&\n\x05value\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr:\x02\x38\x01\"L\n\x0bJsonLiteral\x12\x0f\n\x07literal\x18\x01 \x01(\t\x12,\n\x05\x64type\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType\"\x13\n\x03Ref\x12\x0c\n\x04name\x18\x01 \x01(\t\"Y\n\x05Unary\x12&\n\x02op\x18\x01 \x01(\x0e\x32\x1a.fennel.proto.expr.UnaryOp\x12(\n\x07operand\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"}\n\x06\x42inary\x12%\n\x04left\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12&\n\x05right\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12$\n\x02op\x18\x03 \x01(\x0e\x32\x18.fennel.proto.expr.BinOp\"b\n\x04\x43\x61se\x12.\n\twhen_then\x18\x01 \x03(\x0b\x32\x1b.fennel.proto.expr.WhenThen\x12*\n\totherwise\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"X\n\x08WhenThen\x12%\n\x04when\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x04then\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"2\n\x06IsNull\x12(\n\x07operand\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"[\n\x08\x46illNull\x12(\n\x07operand\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x04\x66ill\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"\xc3\x01\n\x06ListOp\x12%\n\x03len\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.LenH\x00\x12&\n\x03get\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.ExprH\x00\x12/\n\x08\x63ontains\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.expr.ContainsH\x00\x12.\n\x08has_null\x18\x04 \x01(\x0b\x32\x1a.fennel.proto.expr.HasNullH\x00\x42\t\n\x07\x66n_type\"\x05\n\x03Len\"\t\n\x07HasNull\"4\n\x08\x43ontains\x12(\n\x07\x65lement\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"V\n\x06ListFn\x12%\n\x04list\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x02\x66n\x18\x02 \x01(\x0b\x32\x19.fennel.proto.expr.ListOp\"\xb9\x01\n\x06MathOp\x12)\n\x05round\x18\x01 \x01(\x0b\x32\x18.fennel.proto.expr.RoundH\x00\x12%\n\x03\x61\x62s\x18\x02 \x01(\x0b\x32\x16.fennel.proto.expr.AbsH\x00\x12\'\n\x04\x63\x65il\x18\x03 \x01(\x0b\x32\x17.fennel.proto.expr.CeilH\x00\x12)\n\x05\x66loor\x18\x04 \x01(\x0b\x32\x18.fennel.proto.expr.FloorH\x00\x42\t\n\x07\x66n_type\"\x1a\n\x05Round\x12\x11\n\tprecision\x18\x01 \x01(\x05\"\x05\n\x03\x41\x62s\"\x06\n\x04\x43\x65il\"\x07\n\x05\x46loor\"Y\n\x06MathFn\x12(\n\x07operand\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x02\x66n\x18\x02 \x01(\x0b\x32\x19.fennel.proto.expr.MathOp\"&\n\x08StructOp\x12\x0f\n\x05\x66ield\x18\x01 \x01(\tH\x00\x42\t\n\x07\x66n_type\"\\\n\x08StructFn\x12\'\n\x06struct\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12\'\n\x02\x66n\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.StructOp\"a\n\x07\x44ictGet\x12&\n\x05\x66ield\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12.\n\rdefault_value\x18\x03 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"\x96\x01\n\x06\x44ictOp\x12%\n\x03len\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.LenH\x00\x12)\n\x03get\x18\x02 \x01(\x0b\x32\x1a.fennel.proto.expr.DictGetH\x00\x12/\n\x08\x63ontains\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.expr.ContainsH\x00\x42\t\n\x07\x66n_type\"V\n\x06\x44ictFn\x12%\n\x04\x64ict\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x02\x66n\x18\x02 \x01(\x0b\x32\x19.fennel.proto.expr.DictOp\"\xc5\x03\n\x08StringOp\x12%\n\x03len\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.LenH\x00\x12-\n\x07tolower\x18\x02 \x01(\x0b\x32\x1a.fennel.proto.expr.ToLowerH\x00\x12-\n\x07toupper\x18\x03 \x01(\x0b\x32\x1a.fennel.proto.expr.ToUpperH\x00\x12/\n\x08\x63ontains\x18\x04 \x01(\x0b\x32\x1b.fennel.proto.expr.ContainsH\x00\x12\x33\n\nstartswith\x18\x05 \x01(\x0b\x32\x1d.fennel.proto.expr.StartsWithH\x00\x12/\n\x08\x65ndswith\x18\x06 \x01(\x0b\x32\x1b.fennel.proto.expr.EndsWithH\x00\x12+\n\x06\x63oncat\x18\x07 \x01(\x0b\x32\x19.fennel.proto.expr.ConcatH\x00\x12/\n\x08strptime\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.expr.StrptimeH\x00\x12\x34\n\x0bjson_decode\x18\t \x01(\x0b\x32\x1d.fennel.proto.expr.JsonDecodeH\x00\x42\t\n\x07\x66n_type\"\x1c\n\x08Timezone\x12\x10\n\x08timezone\x18\x01 \x01(\t\":\n\nJsonDecode\x12,\n\x05\x64type\x18\x01 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType\"I\n\x08Strptime\x12\x0e\n\x06\x66ormat\x18\x01 \x01(\t\x12-\n\x08timezone\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.Timezone\"\t\n\x07ToLower\"\t\n\x07ToUpper\"2\n\nStartsWith\x12$\n\x03key\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"0\n\x08\x45ndsWith\x12$\n\x03key\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"0\n\x06\x43oncat\x12&\n\x05other\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"\\\n\x08StringFn\x12\'\n\x06string\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12\'\n\x02\x66n\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.StringOp\"b\n\nDateTimeFn\x12)\n\x08\x64\x61tetime\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12)\n\x02\x66n\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.expr.DateTimeOp\"\xd2\x01\n\nDateTimeOp\x12)\n\x05since\x18\x01 \x01(\x0b\x32\x18.fennel.proto.expr.SinceH\x00\x12\x34\n\x0bsince_epoch\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.expr.SinceEpochH\x00\x12/\n\x08strftime\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.expr.StrftimeH\x00\x12\'\n\x04part\x18\x04 \x01(\x0b\x32\x17.fennel.proto.expr.PartH\x00\x42\t\n\x07\x66n_type\"Z\n\x05Since\x12&\n\x05other\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12)\n\x04unit\x18\x02 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit\"7\n\nSinceEpoch\x12)\n\x04unit\x18\x01 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit\"\x1a\n\x08Strftime\x12\x0e\n\x06\x66ormat\x18\x01 \x01(\t\"1\n\x04Part\x12)\n\x04unit\x18\x01 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit*\x1b\n\x07UnaryOp\x12\x07\n\x03NEG\x10\x00\x12\x07\n\x03NOT\x10\x01*\x86\x01\n\x05\x42inOp\x12\x07\n\x03\x41\x44\x44\x10\x00\x12\x07\n\x03SUB\x10\x01\x12\x07\n\x03MUL\x10\x02\x12\x07\n\x03\x44IV\x10\x03\x12\x07\n\x03MOD\x10\x04\x12\r\n\tFLOOR_DIV\x10\x05\x12\x06\n\x02\x45Q\x10\x06\x12\x06\n\x02NE\x10\x07\x12\x06\n\x02GT\x10\x08\x12\x07\n\x03GTE\x10\t\x12\x06\n\x02LT\x10\n\x12\x07\n\x03LTE\x10\x0b\x12\x07\n\x03\x41ND\x10\x0c\x12\x06\n\x02OR\x10\r*\x83\x01\n\x08TimeUnit\x12\x0b\n\x07UNKNOWN\x10\x00\x12\n\n\x06SECOND\x10\x01\x12\n\n\x06MINUTE\x10\x02\x12\x08\n\x04HOUR\x10\x03\x12\x07\n\x03\x44\x41Y\x10\x04\x12\x08\n\x04WEEK\x10\x05\x12\t\n\x05MONTH\x10\x06\x12\x08\n\x04YEAR\x10\x07\x12\x0f\n\x0bMICROSECOND\x10\x08\x12\x0f\n\x0bMILLISECOND\x10\tb\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\nexpr.proto\x12\x11\x66\x65nnel.proto.expr\x1a\x0cschema.proto\"\xc1\x06\n\x04\x45xpr\x12%\n\x03ref\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.RefH\x00\x12\x36\n\x0cjson_literal\x18\x02 \x01(\x0b\x32\x1e.fennel.proto.expr.JsonLiteralH\x00\x12)\n\x05unary\x18\x04 \x01(\x0b\x32\x18.fennel.proto.expr.UnaryH\x00\x12\'\n\x04\x63\x61se\x18\x05 \x01(\x0b\x32\x17.fennel.proto.expr.CaseH\x00\x12+\n\x06\x62inary\x18\x06 \x01(\x0b\x32\x19.fennel.proto.expr.BinaryH\x00\x12+\n\x06isnull\x18\x07 \x01(\x0b\x32\x19.fennel.proto.expr.IsNullH\x00\x12/\n\x08\x66illnull\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.expr.FillNullH\x00\x12,\n\x07list_fn\x18\t \x01(\x0b\x32\x19.fennel.proto.expr.ListFnH\x00\x12,\n\x07math_fn\x18\n \x01(\x0b\x32\x19.fennel.proto.expr.MathFnH\x00\x12\x30\n\tstruct_fn\x18\x0b \x01(\x0b\x32\x1b.fennel.proto.expr.StructFnH\x00\x12,\n\x07\x64ict_fn\x18\x0c \x01(\x0b\x32\x19.fennel.proto.expr.DictFnH\x00\x12\x30\n\tstring_fn\x18\r \x01(\x0b\x32\x1b.fennel.proto.expr.StringFnH\x00\x12\x34\n\x0b\x64\x61tetime_fn\x18\x0e \x01(\x0b\x32\x1d.fennel.proto.expr.DateTimeFnH\x00\x12>\n\x10\x64\x61tetime_literal\x18\x0f \x01(\x0b\x32\".fennel.proto.expr.DatetimeLiteralH\x00\x12\x34\n\x0bmake_struct\x18\x10 \x01(\x0b\x32\x1d.fennel.proto.expr.MakeStructH\x00\x12\x32\n\nfrom_epoch\x18\x11 \x01(\x0b\x32\x1c.fennel.proto.expr.FromEpochH\x00\x12%\n\x03var\x18\x12 \x01(\x0b\x32\x16.fennel.proto.expr.VarH\x00\x42\x06\n\x04node\"\x13\n\x03Var\x12\x0c\n\x04name\x18\x01 \x01(\t\"a\n\tFromEpoch\x12)\n\x08\x64uration\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12)\n\x04unit\x18\x02 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit\"\xad\x01\n\x0f\x44\x61tetimeLiteral\x12\x0c\n\x04year\x18\x01 \x01(\r\x12\r\n\x05month\x18\x02 \x01(\r\x12\x0b\n\x03\x64\x61y\x18\x03 \x01(\r\x12\x0c\n\x04hour\x18\x04 \x01(\r\x12\x0e\n\x06minute\x18\x05 \x01(\r\x12\x0e\n\x06second\x18\x06 \x01(\r\x12\x13\n\x0bmicrosecond\x18\x07 \x01(\r\x12-\n\x08timezone\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.expr.Timezone\"\xc5\x01\n\nMakeStruct\x12\x34\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x1f.fennel.proto.schema.StructType\x12\x39\n\x06\x66ields\x18\x02 \x03(\x0b\x32).fennel.proto.expr.MakeStruct.FieldsEntry\x1a\x46\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12&\n\x05value\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr:\x02\x38\x01\"L\n\x0bJsonLiteral\x12\x0f\n\x07literal\x18\x01 \x01(\t\x12,\n\x05\x64type\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType\"\x13\n\x03Ref\x12\x0c\n\x04name\x18\x01 \x01(\t\"Y\n\x05Unary\x12&\n\x02op\x18\x01 \x01(\x0e\x32\x1a.fennel.proto.expr.UnaryOp\x12(\n\x07operand\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"}\n\x06\x42inary\x12%\n\x04left\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12&\n\x05right\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12$\n\x02op\x18\x03 \x01(\x0e\x32\x18.fennel.proto.expr.BinOp\"b\n\x04\x43\x61se\x12.\n\twhen_then\x18\x01 \x03(\x0b\x32\x1b.fennel.proto.expr.WhenThen\x12*\n\totherwise\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"X\n\x08WhenThen\x12%\n\x04when\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x04then\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"2\n\x06IsNull\x12(\n\x07operand\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"[\n\x08\x46illNull\x12(\n\x07operand\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x04\x66ill\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"\xa3\x04\n\x06ListOp\x12%\n\x03len\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.LenH\x00\x12&\n\x03get\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.ExprH\x00\x12/\n\x08\x63ontains\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.expr.ContainsH\x00\x12.\n\x08has_null\x18\x04 \x01(\x0b\x32\x1a.fennel.proto.expr.HasNullH\x00\x12)\n\x03sum\x18\x05 \x01(\x0b\x32\x1a.fennel.proto.expr.ListSumH\x00\x12)\n\x03min\x18\x06 \x01(\x0b\x32\x1a.fennel.proto.expr.ListMinH\x00\x12)\n\x03max\x18\x07 \x01(\x0b\x32\x1a.fennel.proto.expr.ListMaxH\x00\x12)\n\x03\x61ll\x18\x08 \x01(\x0b\x32\x1a.fennel.proto.expr.ListAllH\x00\x12)\n\x03\x61ny\x18\t \x01(\x0b\x32\x1a.fennel.proto.expr.ListAnyH\x00\x12+\n\x04mean\x18\n \x01(\x0b\x32\x1b.fennel.proto.expr.ListMeanH\x00\x12/\n\x06\x66ilter\x18\x0b \x01(\x0b\x32\x1d.fennel.proto.expr.ListFilterH\x00\x12)\n\x03map\x18\x0c \x01(\x0b\x32\x1a.fennel.proto.expr.ListMapH\x00\x42\t\n\x07\x66n_type\"E\n\nListFilter\x12\x0b\n\x03var\x18\x01 \x01(\t\x12*\n\tpredicate\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"A\n\x07ListMap\x12\x0b\n\x03var\x18\x01 \x01(\t\x12)\n\x08map_expr\x18\x02 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"\t\n\x07ListSum\"\t\n\x07ListMin\"\n\n\x08ListMean\"\t\n\x07ListMax\"\t\n\x07ListAll\"\t\n\x07ListAny\"\x05\n\x03Len\"\t\n\x07HasNull\"4\n\x08\x43ontains\x12(\n\x07\x65lement\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"V\n\x06ListFn\x12%\n\x04list\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x02\x66n\x18\x02 \x01(\x0b\x32\x19.fennel.proto.expr.ListOp\"\xb9\x01\n\x06MathOp\x12)\n\x05round\x18\x01 \x01(\x0b\x32\x18.fennel.proto.expr.RoundH\x00\x12%\n\x03\x61\x62s\x18\x02 \x01(\x0b\x32\x16.fennel.proto.expr.AbsH\x00\x12\'\n\x04\x63\x65il\x18\x03 \x01(\x0b\x32\x17.fennel.proto.expr.CeilH\x00\x12)\n\x05\x66loor\x18\x04 \x01(\x0b\x32\x18.fennel.proto.expr.FloorH\x00\x42\t\n\x07\x66n_type\"\x1a\n\x05Round\x12\x11\n\tprecision\x18\x01 \x01(\x05\"\x05\n\x03\x41\x62s\"\x06\n\x04\x43\x65il\"\x07\n\x05\x46loor\"Y\n\x06MathFn\x12(\n\x07operand\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x02\x66n\x18\x02 \x01(\x0b\x32\x19.fennel.proto.expr.MathOp\"&\n\x08StructOp\x12\x0f\n\x05\x66ield\x18\x01 \x01(\tH\x00\x42\t\n\x07\x66n_type\"\\\n\x08StructFn\x12\'\n\x06struct\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12\'\n\x02\x66n\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.StructOp\"a\n\x07\x44ictGet\x12&\n\x05\x66ield\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12.\n\rdefault_value\x18\x03 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"\x96\x01\n\x06\x44ictOp\x12%\n\x03len\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.LenH\x00\x12)\n\x03get\x18\x02 \x01(\x0b\x32\x1a.fennel.proto.expr.DictGetH\x00\x12/\n\x08\x63ontains\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.expr.ContainsH\x00\x42\t\n\x07\x66n_type\"V\n\x06\x44ictFn\x12%\n\x04\x64ict\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12%\n\x02\x66n\x18\x02 \x01(\x0b\x32\x19.fennel.proto.expr.DictOp\"\xc5\x03\n\x08StringOp\x12%\n\x03len\x18\x01 \x01(\x0b\x32\x16.fennel.proto.expr.LenH\x00\x12-\n\x07tolower\x18\x02 \x01(\x0b\x32\x1a.fennel.proto.expr.ToLowerH\x00\x12-\n\x07toupper\x18\x03 \x01(\x0b\x32\x1a.fennel.proto.expr.ToUpperH\x00\x12/\n\x08\x63ontains\x18\x04 \x01(\x0b\x32\x1b.fennel.proto.expr.ContainsH\x00\x12\x33\n\nstartswith\x18\x05 \x01(\x0b\x32\x1d.fennel.proto.expr.StartsWithH\x00\x12/\n\x08\x65ndswith\x18\x06 \x01(\x0b\x32\x1b.fennel.proto.expr.EndsWithH\x00\x12+\n\x06\x63oncat\x18\x07 \x01(\x0b\x32\x19.fennel.proto.expr.ConcatH\x00\x12/\n\x08strptime\x18\x08 \x01(\x0b\x32\x1b.fennel.proto.expr.StrptimeH\x00\x12\x34\n\x0bjson_decode\x18\t \x01(\x0b\x32\x1d.fennel.proto.expr.JsonDecodeH\x00\x42\t\n\x07\x66n_type\"\x1c\n\x08Timezone\x12\x10\n\x08timezone\x18\x01 \x01(\t\":\n\nJsonDecode\x12,\n\x05\x64type\x18\x01 \x01(\x0b\x32\x1d.fennel.proto.schema.DataType\"I\n\x08Strptime\x12\x0e\n\x06\x66ormat\x18\x01 \x01(\t\x12-\n\x08timezone\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.Timezone\"\t\n\x07ToLower\"\t\n\x07ToUpper\"2\n\nStartsWith\x12$\n\x03key\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"0\n\x08\x45ndsWith\x12$\n\x03key\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"0\n\x06\x43oncat\x12&\n\x05other\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\"\\\n\x08StringFn\x12\'\n\x06string\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12\'\n\x02\x66n\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.StringOp\"b\n\nDateTimeFn\x12)\n\x08\x64\x61tetime\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12)\n\x02\x66n\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.expr.DateTimeOp\"\xd2\x01\n\nDateTimeOp\x12)\n\x05since\x18\x01 \x01(\x0b\x32\x18.fennel.proto.expr.SinceH\x00\x12\x34\n\x0bsince_epoch\x18\x02 \x01(\x0b\x32\x1d.fennel.proto.expr.SinceEpochH\x00\x12/\n\x08strftime\x18\x03 \x01(\x0b\x32\x1b.fennel.proto.expr.StrftimeH\x00\x12\'\n\x04part\x18\x04 \x01(\x0b\x32\x17.fennel.proto.expr.PartH\x00\x42\t\n\x07\x66n_type\"Z\n\x05Since\x12&\n\x05other\x18\x01 \x01(\x0b\x32\x17.fennel.proto.expr.Expr\x12)\n\x04unit\x18\x02 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit\"7\n\nSinceEpoch\x12)\n\x04unit\x18\x01 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit\"I\n\x08Strftime\x12\x0e\n\x06\x66ormat\x18\x01 \x01(\t\x12-\n\x08timezone\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.Timezone\"`\n\x04Part\x12)\n\x04unit\x18\x01 \x01(\x0e\x32\x1b.fennel.proto.expr.TimeUnit\x12-\n\x08timezone\x18\x02 \x01(\x0b\x32\x1b.fennel.proto.expr.Timezone*\x1b\n\x07UnaryOp\x12\x07\n\x03NEG\x10\x00\x12\x07\n\x03NOT\x10\x01*\x86\x01\n\x05\x42inOp\x12\x07\n\x03\x41\x44\x44\x10\x00\x12\x07\n\x03SUB\x10\x01\x12\x07\n\x03MUL\x10\x02\x12\x07\n\x03\x44IV\x10\x03\x12\x07\n\x03MOD\x10\x04\x12\r\n\tFLOOR_DIV\x10\x05\x12\x06\n\x02\x45Q\x10\x06\x12\x06\n\x02NE\x10\x07\x12\x06\n\x02GT\x10\x08\x12\x07\n\x03GTE\x10\t\x12\x06\n\x02LT\x10\n\x12\x07\n\x03LTE\x10\x0b\x12\x07\n\x03\x41ND\x10\x0c\x12\x06\n\x02OR\x10\r*\x83\x01\n\x08TimeUnit\x12\x0b\n\x07UNKNOWN\x10\x00\x12\n\n\x06SECOND\x10\x01\x12\n\n\x06MINUTE\x10\x02\x12\x08\n\x04HOUR\x10\x03\x12\x07\n\x03\x44\x41Y\x10\x04\x12\x08\n\x04WEEK\x10\x05\x12\t\n\x05MONTH\x10\x06\x12\x08\n\x04YEAR\x10\x07\x12\x0f\n\x0bMICROSECOND\x10\x08\x12\x0f\n\x0bMILLISECOND\x10\tb\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
@@ -23,100 +23,118 @@
DESCRIPTOR._options = None
_globals['_MAKESTRUCT_FIELDSENTRY']._options = None
_globals['_MAKESTRUCT_FIELDSENTRY']._serialized_options = b'8\001'
- _globals['_UNARYOP']._serialized_start=4564
- _globals['_UNARYOP']._serialized_end=4591
- _globals['_BINOP']._serialized_start=4594
- _globals['_BINOP']._serialized_end=4728
- _globals['_TIMEUNIT']._serialized_start=4731
- _globals['_TIMEUNIT']._serialized_end=4862
+ _globals['_UNARYOP']._serialized_start=5275
+ _globals['_UNARYOP']._serialized_end=5302
+ _globals['_BINOP']._serialized_start=5305
+ _globals['_BINOP']._serialized_end=5439
+ _globals['_TIMEUNIT']._serialized_start=5442
+ _globals['_TIMEUNIT']._serialized_end=5573
_globals['_EXPR']._serialized_start=48
- _globals['_EXPR']._serialized_end=842
- _globals['_FROMEPOCH']._serialized_start=844
- _globals['_FROMEPOCH']._serialized_end=941
- _globals['_DATETIMELITERAL']._serialized_start=944
- _globals['_DATETIMELITERAL']._serialized_end=1117
- _globals['_MAKESTRUCT']._serialized_start=1120
- _globals['_MAKESTRUCT']._serialized_end=1317
- _globals['_MAKESTRUCT_FIELDSENTRY']._serialized_start=1247
- _globals['_MAKESTRUCT_FIELDSENTRY']._serialized_end=1317
- _globals['_JSONLITERAL']._serialized_start=1319
- _globals['_JSONLITERAL']._serialized_end=1395
- _globals['_REF']._serialized_start=1397
- _globals['_REF']._serialized_end=1416
- _globals['_UNARY']._serialized_start=1418
- _globals['_UNARY']._serialized_end=1507
- _globals['_BINARY']._serialized_start=1509
- _globals['_BINARY']._serialized_end=1634
- _globals['_CASE']._serialized_start=1636
- _globals['_CASE']._serialized_end=1734
- _globals['_WHENTHEN']._serialized_start=1736
- _globals['_WHENTHEN']._serialized_end=1824
- _globals['_ISNULL']._serialized_start=1826
- _globals['_ISNULL']._serialized_end=1876
- _globals['_FILLNULL']._serialized_start=1878
- _globals['_FILLNULL']._serialized_end=1969
- _globals['_LISTOP']._serialized_start=1972
- _globals['_LISTOP']._serialized_end=2167
- _globals['_LEN']._serialized_start=2169
- _globals['_LEN']._serialized_end=2174
- _globals['_HASNULL']._serialized_start=2176
- _globals['_HASNULL']._serialized_end=2185
- _globals['_CONTAINS']._serialized_start=2187
- _globals['_CONTAINS']._serialized_end=2239
- _globals['_LISTFN']._serialized_start=2241
- _globals['_LISTFN']._serialized_end=2327
- _globals['_MATHOP']._serialized_start=2330
- _globals['_MATHOP']._serialized_end=2515
- _globals['_ROUND']._serialized_start=2517
- _globals['_ROUND']._serialized_end=2543
- _globals['_ABS']._serialized_start=2545
- _globals['_ABS']._serialized_end=2550
- _globals['_CEIL']._serialized_start=2552
- _globals['_CEIL']._serialized_end=2558
- _globals['_FLOOR']._serialized_start=2560
- _globals['_FLOOR']._serialized_end=2567
- _globals['_MATHFN']._serialized_start=2569
- _globals['_MATHFN']._serialized_end=2658
- _globals['_STRUCTOP']._serialized_start=2660
- _globals['_STRUCTOP']._serialized_end=2698
- _globals['_STRUCTFN']._serialized_start=2700
- _globals['_STRUCTFN']._serialized_end=2792
- _globals['_DICTGET']._serialized_start=2794
- _globals['_DICTGET']._serialized_end=2891
- _globals['_DICTOP']._serialized_start=2894
- _globals['_DICTOP']._serialized_end=3044
- _globals['_DICTFN']._serialized_start=3046
- _globals['_DICTFN']._serialized_end=3132
- _globals['_STRINGOP']._serialized_start=3135
- _globals['_STRINGOP']._serialized_end=3588
- _globals['_TIMEZONE']._serialized_start=3590
- _globals['_TIMEZONE']._serialized_end=3618
- _globals['_JSONDECODE']._serialized_start=3620
- _globals['_JSONDECODE']._serialized_end=3678
- _globals['_STRPTIME']._serialized_start=3680
- _globals['_STRPTIME']._serialized_end=3753
- _globals['_TOLOWER']._serialized_start=3755
- _globals['_TOLOWER']._serialized_end=3764
- _globals['_TOUPPER']._serialized_start=3766
- _globals['_TOUPPER']._serialized_end=3775
- _globals['_STARTSWITH']._serialized_start=3777
- _globals['_STARTSWITH']._serialized_end=3827
- _globals['_ENDSWITH']._serialized_start=3829
- _globals['_ENDSWITH']._serialized_end=3877
- _globals['_CONCAT']._serialized_start=3879
- _globals['_CONCAT']._serialized_end=3927
- _globals['_STRINGFN']._serialized_start=3929
- _globals['_STRINGFN']._serialized_end=4021
- _globals['_DATETIMEFN']._serialized_start=4023
- _globals['_DATETIMEFN']._serialized_end=4121
- _globals['_DATETIMEOP']._serialized_start=4124
- _globals['_DATETIMEOP']._serialized_end=4334
- _globals['_SINCE']._serialized_start=4336
- _globals['_SINCE']._serialized_end=4426
- _globals['_SINCEEPOCH']._serialized_start=4428
- _globals['_SINCEEPOCH']._serialized_end=4483
- _globals['_STRFTIME']._serialized_start=4485
- _globals['_STRFTIME']._serialized_end=4511
- _globals['_PART']._serialized_start=4513
- _globals['_PART']._serialized_end=4562
+ _globals['_EXPR']._serialized_end=881
+ _globals['_VAR']._serialized_start=883
+ _globals['_VAR']._serialized_end=902
+ _globals['_FROMEPOCH']._serialized_start=904
+ _globals['_FROMEPOCH']._serialized_end=1001
+ _globals['_DATETIMELITERAL']._serialized_start=1004
+ _globals['_DATETIMELITERAL']._serialized_end=1177
+ _globals['_MAKESTRUCT']._serialized_start=1180
+ _globals['_MAKESTRUCT']._serialized_end=1377
+ _globals['_MAKESTRUCT_FIELDSENTRY']._serialized_start=1307
+ _globals['_MAKESTRUCT_FIELDSENTRY']._serialized_end=1377
+ _globals['_JSONLITERAL']._serialized_start=1379
+ _globals['_JSONLITERAL']._serialized_end=1455
+ _globals['_REF']._serialized_start=1457
+ _globals['_REF']._serialized_end=1476
+ _globals['_UNARY']._serialized_start=1478
+ _globals['_UNARY']._serialized_end=1567
+ _globals['_BINARY']._serialized_start=1569
+ _globals['_BINARY']._serialized_end=1694
+ _globals['_CASE']._serialized_start=1696
+ _globals['_CASE']._serialized_end=1794
+ _globals['_WHENTHEN']._serialized_start=1796
+ _globals['_WHENTHEN']._serialized_end=1884
+ _globals['_ISNULL']._serialized_start=1886
+ _globals['_ISNULL']._serialized_end=1936
+ _globals['_FILLNULL']._serialized_start=1938
+ _globals['_FILLNULL']._serialized_end=2029
+ _globals['_LISTOP']._serialized_start=2032
+ _globals['_LISTOP']._serialized_end=2579
+ _globals['_LISTFILTER']._serialized_start=2581
+ _globals['_LISTFILTER']._serialized_end=2650
+ _globals['_LISTMAP']._serialized_start=2652
+ _globals['_LISTMAP']._serialized_end=2717
+ _globals['_LISTSUM']._serialized_start=2719
+ _globals['_LISTSUM']._serialized_end=2728
+ _globals['_LISTMIN']._serialized_start=2730
+ _globals['_LISTMIN']._serialized_end=2739
+ _globals['_LISTMEAN']._serialized_start=2741
+ _globals['_LISTMEAN']._serialized_end=2751
+ _globals['_LISTMAX']._serialized_start=2753
+ _globals['_LISTMAX']._serialized_end=2762
+ _globals['_LISTALL']._serialized_start=2764
+ _globals['_LISTALL']._serialized_end=2773
+ _globals['_LISTANY']._serialized_start=2775
+ _globals['_LISTANY']._serialized_end=2784
+ _globals['_LEN']._serialized_start=2786
+ _globals['_LEN']._serialized_end=2791
+ _globals['_HASNULL']._serialized_start=2793
+ _globals['_HASNULL']._serialized_end=2802
+ _globals['_CONTAINS']._serialized_start=2804
+ _globals['_CONTAINS']._serialized_end=2856
+ _globals['_LISTFN']._serialized_start=2858
+ _globals['_LISTFN']._serialized_end=2944
+ _globals['_MATHOP']._serialized_start=2947
+ _globals['_MATHOP']._serialized_end=3132
+ _globals['_ROUND']._serialized_start=3134
+ _globals['_ROUND']._serialized_end=3160
+ _globals['_ABS']._serialized_start=3162
+ _globals['_ABS']._serialized_end=3167
+ _globals['_CEIL']._serialized_start=3169
+ _globals['_CEIL']._serialized_end=3175
+ _globals['_FLOOR']._serialized_start=3177
+ _globals['_FLOOR']._serialized_end=3184
+ _globals['_MATHFN']._serialized_start=3186
+ _globals['_MATHFN']._serialized_end=3275
+ _globals['_STRUCTOP']._serialized_start=3277
+ _globals['_STRUCTOP']._serialized_end=3315
+ _globals['_STRUCTFN']._serialized_start=3317
+ _globals['_STRUCTFN']._serialized_end=3409
+ _globals['_DICTGET']._serialized_start=3411
+ _globals['_DICTGET']._serialized_end=3508
+ _globals['_DICTOP']._serialized_start=3511
+ _globals['_DICTOP']._serialized_end=3661
+ _globals['_DICTFN']._serialized_start=3663
+ _globals['_DICTFN']._serialized_end=3749
+ _globals['_STRINGOP']._serialized_start=3752
+ _globals['_STRINGOP']._serialized_end=4205
+ _globals['_TIMEZONE']._serialized_start=4207
+ _globals['_TIMEZONE']._serialized_end=4235
+ _globals['_JSONDECODE']._serialized_start=4237
+ _globals['_JSONDECODE']._serialized_end=4295
+ _globals['_STRPTIME']._serialized_start=4297
+ _globals['_STRPTIME']._serialized_end=4370
+ _globals['_TOLOWER']._serialized_start=4372
+ _globals['_TOLOWER']._serialized_end=4381
+ _globals['_TOUPPER']._serialized_start=4383
+ _globals['_TOUPPER']._serialized_end=4392
+ _globals['_STARTSWITH']._serialized_start=4394
+ _globals['_STARTSWITH']._serialized_end=4444
+ _globals['_ENDSWITH']._serialized_start=4446
+ _globals['_ENDSWITH']._serialized_end=4494
+ _globals['_CONCAT']._serialized_start=4496
+ _globals['_CONCAT']._serialized_end=4544
+ _globals['_STRINGFN']._serialized_start=4546
+ _globals['_STRINGFN']._serialized_end=4638
+ _globals['_DATETIMEFN']._serialized_start=4640
+ _globals['_DATETIMEFN']._serialized_end=4738
+ _globals['_DATETIMEOP']._serialized_start=4741
+ _globals['_DATETIMEOP']._serialized_end=4951
+ _globals['_SINCE']._serialized_start=4953
+ _globals['_SINCE']._serialized_end=5043
+ _globals['_SINCEEPOCH']._serialized_start=5045
+ _globals['_SINCEEPOCH']._serialized_end=5100
+ _globals['_STRFTIME']._serialized_start=5102
+ _globals['_STRFTIME']._serialized_end=5175
+ _globals['_PART']._serialized_start=5177
+ _globals['_PART']._serialized_end=5273
# @@protoc_insertion_point(module_scope)
diff --git a/fennel/gen/expr_pb2.pyi b/fennel/gen/expr_pb2.pyi
index b423c09cc..9c2911dd6 100644
--- a/fennel/gen/expr_pb2.pyi
+++ b/fennel/gen/expr_pb2.pyi
@@ -130,6 +130,7 @@ class Expr(google.protobuf.message.Message):
DATETIME_LITERAL_FIELD_NUMBER: builtins.int
MAKE_STRUCT_FIELD_NUMBER: builtins.int
FROM_EPOCH_FIELD_NUMBER: builtins.int
+ VAR_FIELD_NUMBER: builtins.int
@property
def ref(self) -> global___Ref: ...
@property
@@ -163,6 +164,8 @@ class Expr(google.protobuf.message.Message):
def make_struct(self) -> global___MakeStruct: ...
@property
def from_epoch(self) -> global___FromEpoch: ...
+ @property
+ def var(self) -> global___Var: ...
def __init__(
self,
*,
@@ -182,13 +185,29 @@ class Expr(google.protobuf.message.Message):
datetime_literal: global___DatetimeLiteral | None = ...,
make_struct: global___MakeStruct | None = ...,
from_epoch: global___FromEpoch | None = ...,
+ var: global___Var | None = ...,
) -> None: ...
- def HasField(self, field_name: typing_extensions.Literal["binary", b"binary", "case", b"case", "datetime_fn", b"datetime_fn", "datetime_literal", b"datetime_literal", "dict_fn", b"dict_fn", "fillnull", b"fillnull", "from_epoch", b"from_epoch", "isnull", b"isnull", "json_literal", b"json_literal", "list_fn", b"list_fn", "make_struct", b"make_struct", "math_fn", b"math_fn", "node", b"node", "ref", b"ref", "string_fn", b"string_fn", "struct_fn", b"struct_fn", "unary", b"unary"]) -> builtins.bool: ...
- def ClearField(self, field_name: typing_extensions.Literal["binary", b"binary", "case", b"case", "datetime_fn", b"datetime_fn", "datetime_literal", b"datetime_literal", "dict_fn", b"dict_fn", "fillnull", b"fillnull", "from_epoch", b"from_epoch", "isnull", b"isnull", "json_literal", b"json_literal", "list_fn", b"list_fn", "make_struct", b"make_struct", "math_fn", b"math_fn", "node", b"node", "ref", b"ref", "string_fn", b"string_fn", "struct_fn", b"struct_fn", "unary", b"unary"]) -> None: ...
- def WhichOneof(self, oneof_group: typing_extensions.Literal["node", b"node"]) -> typing_extensions.Literal["ref", "json_literal", "unary", "case", "binary", "isnull", "fillnull", "list_fn", "math_fn", "struct_fn", "dict_fn", "string_fn", "datetime_fn", "datetime_literal", "make_struct", "from_epoch"] | None: ...
+ def HasField(self, field_name: typing_extensions.Literal["binary", b"binary", "case", b"case", "datetime_fn", b"datetime_fn", "datetime_literal", b"datetime_literal", "dict_fn", b"dict_fn", "fillnull", b"fillnull", "from_epoch", b"from_epoch", "isnull", b"isnull", "json_literal", b"json_literal", "list_fn", b"list_fn", "make_struct", b"make_struct", "math_fn", b"math_fn", "node", b"node", "ref", b"ref", "string_fn", b"string_fn", "struct_fn", b"struct_fn", "unary", b"unary", "var", b"var"]) -> builtins.bool: ...
+ def ClearField(self, field_name: typing_extensions.Literal["binary", b"binary", "case", b"case", "datetime_fn", b"datetime_fn", "datetime_literal", b"datetime_literal", "dict_fn", b"dict_fn", "fillnull", b"fillnull", "from_epoch", b"from_epoch", "isnull", b"isnull", "json_literal", b"json_literal", "list_fn", b"list_fn", "make_struct", b"make_struct", "math_fn", b"math_fn", "node", b"node", "ref", b"ref", "string_fn", b"string_fn", "struct_fn", b"struct_fn", "unary", b"unary", "var", b"var"]) -> None: ...
+ def WhichOneof(self, oneof_group: typing_extensions.Literal["node", b"node"]) -> typing_extensions.Literal["ref", "json_literal", "unary", "case", "binary", "isnull", "fillnull", "list_fn", "math_fn", "struct_fn", "dict_fn", "string_fn", "datetime_fn", "datetime_literal", "make_struct", "from_epoch", "var"] | None: ...
global___Expr = Expr
+@typing_extensions.final
+class Var(google.protobuf.message.Message):
+ DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+ NAME_FIELD_NUMBER: builtins.int
+ name: builtins.str
+ def __init__(
+ self,
+ *,
+ name: builtins.str = ...,
+ ) -> None: ...
+ def ClearField(self, field_name: typing_extensions.Literal["name", b"name"]) -> None: ...
+
+global___Var = Var
+
@typing_extensions.final
class FromEpoch(google.protobuf.message.Message):
DESCRIPTOR: google.protobuf.descriptor.Descriptor
@@ -454,6 +473,14 @@ class ListOp(google.protobuf.message.Message):
GET_FIELD_NUMBER: builtins.int
CONTAINS_FIELD_NUMBER: builtins.int
HAS_NULL_FIELD_NUMBER: builtins.int
+ SUM_FIELD_NUMBER: builtins.int
+ MIN_FIELD_NUMBER: builtins.int
+ MAX_FIELD_NUMBER: builtins.int
+ ALL_FIELD_NUMBER: builtins.int
+ ANY_FIELD_NUMBER: builtins.int
+ MEAN_FIELD_NUMBER: builtins.int
+ FILTER_FIELD_NUMBER: builtins.int
+ MAP_FIELD_NUMBER: builtins.int
@property
def len(self) -> global___Len: ...
@property
@@ -464,6 +491,22 @@ class ListOp(google.protobuf.message.Message):
"""Check if the list contains an element"""
@property
def has_null(self) -> global___HasNull: ...
+ @property
+ def sum(self) -> global___ListSum: ...
+ @property
+ def min(self) -> global___ListMin: ...
+ @property
+ def max(self) -> global___ListMax: ...
+ @property
+ def all(self) -> global___ListAll: ...
+ @property
+ def any(self) -> global___ListAny: ...
+ @property
+ def mean(self) -> global___ListMean: ...
+ @property
+ def filter(self) -> global___ListFilter: ...
+ @property
+ def map(self) -> global___ListMap: ...
def __init__(
self,
*,
@@ -471,13 +514,121 @@ class ListOp(google.protobuf.message.Message):
get: global___Expr | None = ...,
contains: global___Contains | None = ...,
has_null: global___HasNull | None = ...,
+ sum: global___ListSum | None = ...,
+ min: global___ListMin | None = ...,
+ max: global___ListMax | None = ...,
+ all: global___ListAll | None = ...,
+ any: global___ListAny | None = ...,
+ mean: global___ListMean | None = ...,
+ filter: global___ListFilter | None = ...,
+ map: global___ListMap | None = ...,
) -> None: ...
- def HasField(self, field_name: typing_extensions.Literal["contains", b"contains", "fn_type", b"fn_type", "get", b"get", "has_null", b"has_null", "len", b"len"]) -> builtins.bool: ...
- def ClearField(self, field_name: typing_extensions.Literal["contains", b"contains", "fn_type", b"fn_type", "get", b"get", "has_null", b"has_null", "len", b"len"]) -> None: ...
- def WhichOneof(self, oneof_group: typing_extensions.Literal["fn_type", b"fn_type"]) -> typing_extensions.Literal["len", "get", "contains", "has_null"] | None: ...
+ def HasField(self, field_name: typing_extensions.Literal["all", b"all", "any", b"any", "contains", b"contains", "filter", b"filter", "fn_type", b"fn_type", "get", b"get", "has_null", b"has_null", "len", b"len", "map", b"map", "max", b"max", "mean", b"mean", "min", b"min", "sum", b"sum"]) -> builtins.bool: ...
+ def ClearField(self, field_name: typing_extensions.Literal["all", b"all", "any", b"any", "contains", b"contains", "filter", b"filter", "fn_type", b"fn_type", "get", b"get", "has_null", b"has_null", "len", b"len", "map", b"map", "max", b"max", "mean", b"mean", "min", b"min", "sum", b"sum"]) -> None: ...
+ def WhichOneof(self, oneof_group: typing_extensions.Literal["fn_type", b"fn_type"]) -> typing_extensions.Literal["len", "get", "contains", "has_null", "sum", "min", "max", "all", "any", "mean", "filter", "map"] | None: ...
global___ListOp = ListOp
+@typing_extensions.final
+class ListFilter(google.protobuf.message.Message):
+ DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+ VAR_FIELD_NUMBER: builtins.int
+ PREDICATE_FIELD_NUMBER: builtins.int
+ var: builtins.str
+ @property
+ def predicate(self) -> global___Expr: ...
+ def __init__(
+ self,
+ *,
+ var: builtins.str = ...,
+ predicate: global___Expr | None = ...,
+ ) -> None: ...
+ def HasField(self, field_name: typing_extensions.Literal["predicate", b"predicate"]) -> builtins.bool: ...
+ def ClearField(self, field_name: typing_extensions.Literal["predicate", b"predicate", "var", b"var"]) -> None: ...
+
+global___ListFilter = ListFilter
+
+@typing_extensions.final
+class ListMap(google.protobuf.message.Message):
+ DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+ VAR_FIELD_NUMBER: builtins.int
+ MAP_EXPR_FIELD_NUMBER: builtins.int
+ var: builtins.str
+ @property
+ def map_expr(self) -> global___Expr: ...
+ def __init__(
+ self,
+ *,
+ var: builtins.str = ...,
+ map_expr: global___Expr | None = ...,
+ ) -> None: ...
+ def HasField(self, field_name: typing_extensions.Literal["map_expr", b"map_expr"]) -> builtins.bool: ...
+ def ClearField(self, field_name: typing_extensions.Literal["map_expr", b"map_expr", "var", b"var"]) -> None: ...
+
+global___ListMap = ListMap
+
+@typing_extensions.final
+class ListSum(google.protobuf.message.Message):
+ DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+ def __init__(
+ self,
+ ) -> None: ...
+
+global___ListSum = ListSum
+
+@typing_extensions.final
+class ListMin(google.protobuf.message.Message):
+ DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+ def __init__(
+ self,
+ ) -> None: ...
+
+global___ListMin = ListMin
+
+@typing_extensions.final
+class ListMean(google.protobuf.message.Message):
+ DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+ def __init__(
+ self,
+ ) -> None: ...
+
+global___ListMean = ListMean
+
+@typing_extensions.final
+class ListMax(google.protobuf.message.Message):
+ DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+ def __init__(
+ self,
+ ) -> None: ...
+
+global___ListMax = ListMax
+
+@typing_extensions.final
+class ListAll(google.protobuf.message.Message):
+ DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+ def __init__(
+ self,
+ ) -> None: ...
+
+global___ListAll = ListAll
+
+@typing_extensions.final
+class ListAny(google.protobuf.message.Message):
+ DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+ def __init__(
+ self,
+ ) -> None: ...
+
+global___ListAny = ListAny
+
@typing_extensions.final
class Len(google.protobuf.message.Message):
DESCRIPTOR: google.protobuf.descriptor.Descriptor
@@ -1023,13 +1174,18 @@ class Strftime(google.protobuf.message.Message):
DESCRIPTOR: google.protobuf.descriptor.Descriptor
FORMAT_FIELD_NUMBER: builtins.int
+ TIMEZONE_FIELD_NUMBER: builtins.int
format: builtins.str
+ @property
+ def timezone(self) -> global___Timezone: ...
def __init__(
self,
*,
format: builtins.str = ...,
+ timezone: global___Timezone | None = ...,
) -> None: ...
- def ClearField(self, field_name: typing_extensions.Literal["format", b"format"]) -> None: ...
+ def HasField(self, field_name: typing_extensions.Literal["timezone", b"timezone"]) -> builtins.bool: ...
+ def ClearField(self, field_name: typing_extensions.Literal["format", b"format", "timezone", b"timezone"]) -> None: ...
global___Strftime = Strftime
@@ -1038,12 +1194,17 @@ class Part(google.protobuf.message.Message):
DESCRIPTOR: google.protobuf.descriptor.Descriptor
UNIT_FIELD_NUMBER: builtins.int
+ TIMEZONE_FIELD_NUMBER: builtins.int
unit: global___TimeUnit.ValueType
+ @property
+ def timezone(self) -> global___Timezone: ...
def __init__(
self,
*,
unit: global___TimeUnit.ValueType = ...,
+ timezone: global___Timezone | None = ...,
) -> None: ...
- def ClearField(self, field_name: typing_extensions.Literal["unit", b"unit"]) -> None: ...
+ def HasField(self, field_name: typing_extensions.Literal["timezone", b"timezone"]) -> builtins.bool: ...
+ def ClearField(self, field_name: typing_extensions.Literal["timezone", b"timezone", "unit", b"unit"]) -> None: ...
global___Part = Part
diff --git a/pyproject.toml b/pyproject.toml
index b40baedc8..c1fe29608 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ readme = "README.md"
[tool.poetry.dependencies]
python = "^3.9"
-pandas = {extras = ["performance"], version = "^2.2.2"}
+pandas = { extras = ["performance"], version = "^2.2.2" }
protobuf = "^4.22.4"
frozendict = "^2.3.8"
numpy = [
@@ -20,7 +20,7 @@ pytest = "7.1.3"
pytest-rerunfailures = "^13.0"
sortedcontainers = "^2.4.0"
typing-extensions = "^4.12.0"
-fennel-data-lib = "0.1.18"
+fennel-data-lib = "0.1.20"
pyarrow = "^14.0.2"
[tool.poetry.dev-dependencies]