Skip to content

Commit

Permalink
docs: start documenting expressions
Browse files Browse the repository at this point in the history
  • Loading branch information
nikhilgarg28 authored and aditya-nambiar committed Sep 4, 2024
1 parent e660b1b commit 82554d0
Show file tree
Hide file tree
Showing 29 changed files with 1,062 additions and 13 deletions.
49 changes: 49 additions & 0 deletions docs/api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,55 @@ sidebar:
- "api-reference/aggregations/quantile"
- "api-reference/aggregations/exponential-decay-sum"

- slug: "api-reference/expressions"
title: "Expressions"
pages:
- "api-reference/expressions/binary"
- "api-reference/expressions/col"
- "api-reference/expressions/eval"
- "api-reference/expressions/isnull"
- "api-reference/expressions/fillnull"
- "api-reference/expressions/lit"
- "api-reference/expressions/not"
- "api-reference/expressions/typeof"
- "api-reference/expressions/when"
# - "api-reference/expressions/datetime"
# - "api-reference/expressions/str/lower"
# - "api-reference/expressions/str/upper"
# - "api-reference/expressions/str/endswith"
# - "api-reference/expressions/str/concat"
# - "api-reference/expressions/str/parse"
# - "api-reference/expressions/str/len"
# - "api-reference/expressions/str/contains"
# - "api-reference/expressions/list.len"
# - "api-reference/expressions/list.hasnull"
# - "api-reference/expressions/list.contains"
# - "api-reference/expressions/dt.since"
# - "api-reference/expressions/dt.since_epoch"
# - "api-reference/expressions/dt.year"
# - "api-reference/expressions/dt.month"
# - "api-reference/expressions/dt.day"
# - "api-reference/expressions/dt.hour"
# - "api-reference/expressions/dt.minute"
# - "api-reference/expressions/dt.second"
# - "api-reference/expressions/dt.strftime"
# - "api-reference/expressions/struct"
# - "api-reference/expressions/from_epoch"
# - "api-reference/expressions/struct.get"

- slug: "api-reference/expressions/str"
title: "String Expressions"
pages:
- "api-reference/expressions/str/startswith"

- slug: "api-reference/expressions/num"
title: "Num Expressions"
pages:
- "api-reference/expressions/num/abs"
- "api-reference/expressions/num/ceil"
- "api-reference/expressions/num/floor"
- "api-reference/expressions/num/round"

- slug: "api-reference/decorators"
title: "Decorators"
pages:
Expand Down
157 changes: 157 additions & 0 deletions docs/examples/api-reference/expressions/basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import pytest
from typing import Optional
import pandas as pd

def test_num_abs():
# docsnip expr_num_abs
from fennel.expr import col

# docsnip-highlight next-line
expr = col("x").num.abs()
assert expr.typeof(schema={"x": int}) == int
assert expr.typeof(schema={"x": Optional[int]}) == Optional[int]
assert expr.typeof(schema={"x": float}) == float
assert expr.typeof(schema={"x": Optional[float]}) == Optional[float]

# can be evaluated with a dataframe
df = pd.DataFrame({"x": pd.Series([1, -2, None], dtype=pd.Int64Dtype())})
assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [1, 2, None]

with pytest.raises(ValueError):
expr.typeof(schema={"x": str})
# /docsnip

def test_unary_not():
# docsnip expr_unary_not
from fennel.expr import lit

# docsnip-highlight next-line
expr = ~lit(True)
assert expr.typeof() == bool

# can be evaluated with a dataframe
df = pd.DataFrame({"x": [1, 2, 3]})
assert expr.eval(df, schema={"x": int}).tolist() == [False, False, False]
# /docsnip

def test_col():
# docsnip expr_col
from fennel.expr import col

# docsnip-highlight next-line
expr = col("x") + col("y")

# type of col("x") + col("y") changes based on the type of 'x' and 'y'
assert expr.typeof(schema={"x": int, "y": float}) == float

# okay if additional columns are provided
assert expr.typeof(schema={"x": int, "y": float, "z": str}) == float

# raises an error if the schema is not provided
with pytest.raises(ValueError):
expr.typeof(schema={})
with pytest.raises(ValueError):
expr.typeof(schema={"x": int})
with pytest.raises(ValueError):
expr.typeof(schema={"z": int, "y": float})

# can be evaluated with a dataframe
import pandas as pd
df = pd.DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]})
assert expr.eval(df, schema={"x": int, "y": float}).tolist() == [2.0, 4.0, 6.0]
# /docsnip

def test_when_then():
# docsnip expr_when_then
from fennel.expr import when, col

# docsnip-highlight next-line
expr = when(col("x")).then(1).otherwise(0)

# type depends on the type of the then and otherwise values
assert expr.typeof(schema={"x": bool}) == int

# raises an error if the schema is not provided
with pytest.raises(ValueError):
expr.typeof(schema={})
# also when the predicate is not boolean
with pytest.raises(ValueError):
expr.typeof(schema={"x": int})

# can be evaluated with a dataframe
import pandas as pd
df = pd.DataFrame({"x": [True, False, True]})
assert expr.eval(df, schema={"x": bool}).tolist() == [1, 0, 1]

# not valid if only when is provided
expr = when(col("x"))
with pytest.raises(ValueError):
expr.typeof(schema={"x": bool})

# if otherwise is not provided, it defaults to None
expr = when(col("x")).then(1)
assert expr.typeof(schema={"x": bool}) == Optional[int]
# /docsnip

def test_isnull():
# docsnip expr_isnull
from fennel.expr import col

# docsnip-highlight next-line
expr = col("x").isnull()

# type of isnull is always boolean
assert expr.typeof(schema={"x": Optional[int]}) == bool

# also works for non-optional types, where it's always False
assert expr.typeof(schema={"x": float}) == bool

# raises an error if the schema is not provided
with pytest.raises(ValueError):
expr.typeof(schema={})

# can be evaluated with a dataframe
import pandas as pd

df = pd.DataFrame({"x": pd.Series([1, 2, None], dtype=pd.Int64Dtype())})
assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [True, False, False]
# /docsnip

def test_fillnull():
# docsnip expr_fillnull
from fennel.expr import col, lit

# docsnip-highlight next-line
expr = col("x").fillnull(lit(10))

# type of fillnull depends both on type of 'x' and the literal 1
assert expr.typeof(schema={"x": Optional[int]}) == int
assert expr.typeof(schema={"x": float}) == float

# raises an error if the schema is not provided
with pytest.raises(ValueError):
expr.typeof(schema={})

# can be evaluated with a dataframe
import pandas as pd

expr = col("x").fillnull(lit(10))
df = pd.DataFrame({"x": pd.Series([1, 2, None], dtype=pd.Int64Dtype())})
assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [1., 2., 10.]
# /docsnip

def test_lit():
# docsnip expr_lit
from fennel.expr import lit, col

# docsnip-highlight next-line
expr = lit(1)

# lits don't need a schema to be evaluated
assert expr.typeof() == int

# can be evaluated with a dataframe
expr = col("x") + lit(1)
df = pd.DataFrame({"x": pd.Series([1, 2, None], dtype=pd.Int64Dtype())})
assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [2, 3, None]
# /docsnip
27 changes: 27 additions & 0 deletions docs/examples/api-reference/expressions/binary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import Optional
import pytest

def test_typeof():
# docsnip expr_binary_arithmetic
import pandas as pd
from fennel.expr import lit, col

expr = col("x") + col("y")
assert expr.typeof(schema={"x": int, "y": int}) == int
assert expr.typeof(schema={"x": int, "y": float}) == float
assert expr.typeof(schema={"x": float, "y": float}) == float
assert expr.typeof(schema={"x": Optional[float], "y": int}) == Optional[float]

df = pd.DataFrame({"x": [1, 2, None]})
expr = lit(1) + col("x")
assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [2, 3, None]

expr = lit(1) - col("x")
assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [0, -1, None]

expr = lit(1) * col("x")
assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [1, 2, None]

expr = lit(1) / col("x")
assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [1, 0.5, None]
# /docsnip
52 changes: 52 additions & 0 deletions docs/examples/api-reference/expressions/eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from typing import Optional
import pytest

def test_typeof():
# docsnip expr_typeof
from fennel.expr import lit, col

expr = lit(1) + col("amount")
# type of 1 + col('amount') changes based on the type of 'amount'
assert expr.typeof(schema={"amount": int}) == int
assert expr.typeof(schema={"amount": float}) == float
assert expr.typeof(schema={"amount": Optional[int]}) == Optional[int]
assert expr.typeof(schema={"amount": Optional[float]}) == Optional[float]


# typeof raises an error if type of 'amount' isn't provided
with pytest.raises(ValueError):
expr.typeof()

# or when the expression won't be valid with the schema
with pytest.raises(ValueError):
expr.typeof(schema={"amount": str})

# no need to provide schema if the expression is constant
const = lit(1)
assert const.typeof() == int
# /docsnip


def test_eval():
# docsnip expr_eval
import pandas as pd
from fennel.expr import lit, col

expr = lit(1) + col("amount")
# value of 1 + col('amount') changes based on the type of 'amount'
df = pd.DataFrame({"amount": [1, 2, 3]})
assert expr.eval(df, schema={"amount": int}).tolist() == [2, 3, 4]

df = pd.DataFrame({"amount": [1.0, 2.0, 3.0]})
assert expr.eval(df, schema={"amount": float}).tolist() == [2.0, 3.0, 4.0]

# raises an error if the schema is not provided
with pytest.raises(TypeError):
expr.eval(df)

# dataframe doesn't have the required column even though schema is provided
df = pd.DataFrame({"other": [1, 2, 3]})
with pytest.raises(KeyError):
expr.eval(df, schema={"amount": int})

# /docsnip
63 changes: 63 additions & 0 deletions docs/examples/api-reference/expressions/num.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import pytest
from typing import Optional
import pandas as pd

def test_abs():
# docsnip abs
from fennel.expr import col

# docsnip-highlight next-line
expr = col("x").abs() # equivalent to col("x").num.abs()

assert expr.typeof(schema={"x": int}) == int
assert expr.typeof(schema={"x": Optional[int]}) == Optional[int]
assert expr.typeof(schema={"x": float}) == float
assert expr.typeof(schema={"x": Optional[float]}) == Optional[float]

# can be evaluated with a dataframe
df = pd.DataFrame({"x": pd.Series([1, -2, pd.NA], dtype=pd.Int64Dtype())})
assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [1, 2, pd.NA]

with pytest.raises(ValueError):
expr.typeof(schema={"x": str})
# /docsnip


def test_floor():
# docsnip floor
from fennel.expr import col

# docsnip-highlight next-line
expr = col("x").floor() # equivalent to col("x").num.floor()
assert expr.typeof(schema={"x": int}) == int
assert expr.typeof(schema={"x": Optional[int]}) == Optional[int]
assert expr.typeof(schema={"x": float}) == int
assert expr.typeof(schema={"x": Optional[float]}) == Optional[int]

# can be evaluated with a dataframe
df = pd.DataFrame({"x": pd.Series([1.1, -2.3, None])})
assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [1, -3, pd.NA]

with pytest.raises(ValueError):
expr.typeof(schema={"x": str})
# /docsnip


def test_ceil():
# docsnip ceil
from fennel.expr import col

# docsnip-highlight next-line
expr = col("x").ceil() # equivalent to col("x").num.ceil()
assert expr.typeof(schema={"x": int}) == int
assert expr.typeof(schema={"x": Optional[int]}) == Optional[int]
assert expr.typeof(schema={"x": float}) == int
assert expr.typeof(schema={"x": Optional[float]}) == Optional[int]

# can be evaluated with a dataframe
df = pd.DataFrame({"x": pd.Series([1.1, -2.3, None])})
assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [2, -2, pd.NA]

with pytest.raises(ValueError):
expr.typeof(schema={"x": str})
# /docsnip
Loading

0 comments on commit 82554d0

Please sign in to comment.