Skip to content

Commit

Permalink
expressions: add zip, repeat, sqrt, log, pow along with docs
Browse files Browse the repository at this point in the history
  • Loading branch information
nikhilgarg28 committed Dec 25, 2024
1 parent cf00519 commit 3e80c45
Show file tree
Hide file tree
Showing 24 changed files with 1,193 additions and 215 deletions.
3 changes: 3 additions & 0 deletions .wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ DescribeStreamSummary
disambiguity
Dockerfile
expr
exponentiate
Eval
eval
endswith
Expand Down Expand Up @@ -127,6 +128,8 @@ Secret
ShardIteratorType
Signifier
SnapshotData
sqrt
Sqrt
Stddev
Subprocessor
SubscribeToShard
Expand Down
5 changes: 5 additions & 0 deletions docs/api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,10 @@ sidebar:
- "api-reference/expressions/lit"
- "api-reference/expressions/not"
- "api-reference/expressions/now"
- "api-reference/expressions/repeat"
- "api-reference/expressions/typeof"
- "api-reference/expressions/when"
- "api-reference/expressions/zip"

- slug: "api-reference/expressions/dt"
title: "Datetime Expressions"
Expand Down Expand Up @@ -146,7 +148,10 @@ sidebar:
- "api-reference/expressions/num/abs"
- "api-reference/expressions/num/ceil"
- "api-reference/expressions/num/floor"
- "api-reference/expressions/num/log"
- "api-reference/expressions/num/pow"
- "api-reference/expressions/num/round"
- "api-reference/expressions/num/sqrt"
- "api-reference/expressions/num/to_string"

- slug: "api-reference/expressions/str"
Expand Down
54 changes: 52 additions & 2 deletions docs/examples/api-reference/expressions/basic.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import datetime

import pytest
from typing import Optional
from typing import Optional, List
import pandas as pd


Expand Down Expand Up @@ -176,8 +176,58 @@ def test_now():
{"birthdate": [datetime(1997, 12, 24), datetime(2001, 1, 21), None]}
)
assert expr.eval(df, schema={"birthdate": Optional[datetime]}).tolist() == [
26,
27,
23,
pd.NA,
]
# /docsnip


def test_repeat():
# docsnip repeat
from fennel.expr import repeat, col

# docsnip-highlight next-line
expr = repeat(col("x"), col("y"))

assert expr.typeof(schema={"x": bool, "y": int}) == List[bool]

# can be evaluated with a dataframe
df = pd.DataFrame({"x": [True, False, True], "y": [1, 2, 3]})
assert expr.eval(df, schema={"x": bool, "y": int}).tolist() == [
[True],
[False, False],
[True, True, True],
]
# /docsnip


def test_zip():
# docsnip zip
from fennel.lib.schema import struct
from fennel.expr import col

@struct
class MyStruct:
a: int
b: float

# docsnip-highlight next-line
expr = MyStruct.zip(a=col("x"), b=col("y"))

expected = List[MyStruct]
schema = {"x": List[int], "y": List[float]}
assert expr.matches_type(expected, schema)

# note that output is truncated to the length of the shortest list
df = pd.DataFrame(
{"x": [[1, 2], [3, 4], []], "y": [[1.0, 2.0], [3.0], [4.0]]}
)
assert expr.eval(
df, schema={"x": List[int], "y": List[float]}
).tolist() == [
[MyStruct(a=1, b=1.0), MyStruct(a=2, b=2.0)],
[MyStruct(a=3, b=3.0)],
[],
]
# /docsnip
77 changes: 77 additions & 0 deletions docs/examples/api-reference/expressions/num.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
import numpy as np
from typing import Optional
import pandas as pd

Expand Down Expand Up @@ -146,3 +147,79 @@ def test_to_string():
pd.NA,
]
# /docsnip


def test_sqrt():
# docsnip sqrt
from fennel.expr import col

# docsnip-highlight next-line
expr = col("x").num.sqrt()

assert expr.typeof(schema={"x": int}) == float
assert expr.typeof(schema={"x": Optional[int]}) == Optional[float]
assert expr.typeof(schema={"x": float}) == float
assert expr.typeof(schema={"x": Optional[float]}) == Optional[float]

df = pd.DataFrame({"x": pd.Series([1.1, -2.3, 4.0])})
assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [
1.0488088481701516,
pd.NA, # this is nan in pandas, sqrt of negative number
2.0,
]
# /docsnip


def test_log():
# docsnip log
from fennel.expr import col

# docsnip-highlight next-line
expr = col("x").num.log(base=2.0)

assert expr.typeof(schema={"x": int}) == float
assert expr.typeof(schema={"x": Optional[int]}) == Optional[float]
assert expr.typeof(schema={"x": float}) == float
assert expr.typeof(schema={"x": Optional[float]}) == Optional[float]

df = pd.DataFrame({"x": pd.Series([1.1, -2.3, 4.0])})
assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [
0.13750352374993502,
pd.NA, # nan in pandas, log of negative number
2.0,
]
# /docsnip


def test_pow():
# docsnip pow
from fennel.expr import col, lit

# docsnip-highlight next-line
expr = col("x").num.pow(lit(2))

assert expr.typeof(schema={"x": int}) == int
assert expr.typeof(schema={"x": Optional[int]}) == Optional[int]
assert expr.typeof(schema={"x": float}) == float
assert expr.typeof(schema={"x": Optional[float]}) == Optional[float]

df = pd.DataFrame({"x": pd.Series([1, 2, 4])})
assert expr.eval(df, schema={"x": int}).tolist() == [
1,
4,
16,
]

# negative integer exponent raises error if base is also an integer
with pytest.raises(Exception):
expr = lit(2).num.pow(lit(-2))
expr.eval(df, schema={"x": int})

# but works if either base or exponent is a float
expr = lit(2).num.pow(lit(-2.0))
assert expr.eval(df, schema={"x": int}).tolist() == [
0.25,
0.25,
0.25,
]
# /docsnip
34 changes: 34 additions & 0 deletions docs/pages/api-reference/expressions/num/log.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
---
title: Log
order: 0
status: published
---

### Log

Function in `num` namespace to get the logarithm of a number.

#### Parameters
<Expandable title="base" type="float" defaultVal="2.718281828459045">
The base of the logarithm. By default, the base is set to `e` (Euler's number).
</Expandable>

#### Returns
<Expandable type="Expr">
Returns an expression object denoting the logarithm of the input data. The
data type of the resulting expression is `float` if the input was `int` or
`float` and `Optional[float]` if the input was `Optional[int]` or
`Optional[float]`.

For negative numbers, the result is `NaN` (Not a Number).
</Expandable>

<pre snippet="api-reference/expressions/num#log"
status="success" message="Computing logarithm of a number">
</pre>

#### Errors
<Expandable title="Invoking on a non-numeric type">
Error during `typeof` or `eval` if the input expression is not of type int,
float, optional int or optional float.
</Expandable>
45 changes: 45 additions & 0 deletions docs/pages/api-reference/expressions/num/pow.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
---
title: Pow
order: 0
status: published
---

### Pow

Function in `num` namespace to exponentiate a number.

#### Parameters
<Expandable title="exponent" type="Expr">
The exponent to which the base is raised - expected to be a numeric expression.
</Expandable>

#### Returns
<Expandable type="Expr">
Returns an expression object denoting the result of the exponentiation.

The base data type of the resulting expression is `int` if both the base and
exponent are `int`, otherwise it is `float`.

If any of the base or exponent is `Optional`, the resulting expression is
also `Optional` of the base data type.
</Expandable>

<pre snippet="api-reference/expressions/num#pow" status="success"
message="Exponentiating a number">
</pre>

#### Errors

<Expandable title="Invoking on a non-numeric type">
Error during `typeof` or `eval` if the input expression is not of type int,
float, optional int or optional float.
</Expandable>


<Expandable title="Exponentiation of negative integers">
A runtime error will be raised if the exponent is a negative integer and the
base is also an integer.

In such cases, it's advised to convert either the base or the exponent to be a
float.
</Expandable>
31 changes: 31 additions & 0 deletions docs/pages/api-reference/expressions/num/sqrt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
---
title: Sqrt
order: 0
status: published
---

### Sqrt

Function in `num` namespace to get the square root of a number.

#### Returns
<Expandable type="Expr">
Returns an expression object denoting the square root of the input data.

The data type of the resulting expression is `float` if the input is `int` or
`float` and `Optional[float]` if the input is `Optional[int]` or `Optional[float]`.
</Expandable>

:::info
The square root of a negative number is represented as `NaN` in the output.
:::

<pre snippet="api-reference/expressions/num#sqrt"
status="success" message="Getting square root of a number">
</pre>

#### Errors
<Expandable title="Invoking on a non-numeric type">
Error during `typeof` or `eval` if the input expression is not of type int,
float, optional int or optional float.
</Expandable>
39 changes: 39 additions & 0 deletions docs/pages/api-reference/expressions/repeat.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
---
title: Repeat
order: 0
status: published
---
### Repeat

Repeat an expression `n` times to create a list.

#### Parameters
<Expandable title="value" type="Expr">
The expression to repeat.
</Expandable>

<Expandable title="by" type="Expr">
The number of times to repeat the value - can evaluate to a different count for
each row.
</Expandable>


<pre snippet="api-reference/expressions/basic#repeat"
status="success" message="Repeating booleans to create list">
</pre>

#### Returns
<Expandable type="Expr">
Returns an expression object denoting the result of the repeat expression.
</Expandable>


#### Errors
<Expandable title="Invalid input types">
An error is thrown if the `by` expression is not of type int.
In addition, certain types (e.g. lists) are not supported as input for `value`.
</Expandable>

<Expandable title="Negative count">
An error is thrown if the `by` expression evaluates to a negative integer.
</Expandable>
48 changes: 48 additions & 0 deletions docs/pages/api-reference/expressions/zip.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
---
title: Zip
order: 0
status: published
---
### Zip

Zip two or more lists into a list of structs.

#### Parameters
<Expandable title="struct" type="Struct">
The struct to hold the zipped values. Unlike other top level expressions,
`zip` is written as `Struct.zip(kwarg1=expr1, kwarg2=expr2, ...)`.
</Expandable>

<Expandable title="kwargs" type="Dict[str, Expr]">
A dictionary of key-value pairs where the key is the name of the field in the
struct and the value is the expression to zip.

Expressions are expected to evaluate to lists of a type that can be converted to
the corresponding field type in the struct.
</Expandable>


<pre snippet="api-reference/expressions/basic#zip" status="success"
message="Zipping two lists into a list of structs">
</pre>

#### Returns
<Expandable type="Expr">
Returns an expression object denoting the result of the zip expression.
</Expandable>

:::info
When zipping lists of unequal length, similar to Python's zip function, the
resulting list will be truncated to the length of the shortest list, possibly
zero.
:::

#### Errors
<Expandable title="Mismatching types">
An error is thrown if the types of the lists to zip are not compatible with the
field types in the struct.
</Expandable>

<Expandable title="Non-list types">
An error is thrown if the expressions to zip don't evaluate to lists.
</Expandable>
Loading

0 comments on commit 3e80c45

Please sign in to comment.