-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
expr/docs: document all list functions
- Loading branch information
1 parent
3dd15f1
commit 3ad5732
Showing
12 changed files
with
364 additions
and
76 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -244,6 +244,7 @@ hackathon | |
hardcoded | ||
hashable | ||
hashmap | ||
hasnull | ||
hostname | ||
html | ||
hudi | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
import pytest | ||
from typing import Optional, List | ||
import pandas as pd | ||
|
||
|
||
def test_len(): | ||
# docsnip len | ||
from fennel.expr import col | ||
|
||
# docsnip-highlight next-line | ||
expr = col("x").list.len() | ||
|
||
# len works for any list type or optional list type | ||
assert expr.typeof(schema={"x": List[int]}) == int | ||
assert expr.typeof(schema={"x": Optional[List[float]]}) == Optional[int] | ||
|
||
# can be evaluated with a dataframe | ||
df = pd.DataFrame({"x": [[1, 2, 3], [4, 5], [], None]}) | ||
schema = {"x": Optional[List[int]]} | ||
assert expr.eval(df, schema=schema).tolist() == [3, 2, 0, pd.NA] | ||
|
||
# schema of column must be list of something | ||
with pytest.raises(ValueError): | ||
expr.typeof(schema={"x": int}) | ||
# /docsnip | ||
|
||
|
||
def test_has_null(): | ||
# docsnip has_null | ||
from fennel.expr import col | ||
|
||
# docsnip-highlight next-line | ||
expr = col("x").list.hasnull() | ||
|
||
# len works for any list type or optional list type | ||
assert expr.typeof(schema={"x": List[int]}) == bool | ||
assert expr.typeof(schema={"x": Optional[List[float]]}) == Optional[bool] | ||
|
||
# can be evaluated with a dataframe | ||
df = pd.DataFrame({"x": [[1, 2, 3], [4, 5, None], [], None]}) | ||
schema = {"x": Optional[List[Optional[int]]]} | ||
assert expr.eval(df, schema=schema).tolist() == [False, True, False, pd.NA] | ||
|
||
# schema of column must be list of something | ||
with pytest.raises(ValueError): | ||
expr.typeof(schema={"x": int}) | ||
# /docsnip | ||
|
||
|
||
def test_contains(): | ||
# docsnip contains | ||
from fennel.expr import col | ||
|
||
# docsnip-highlight next-line | ||
expr = col("x").list.contains(col("y")) | ||
|
||
# contains works for only list types | ||
assert expr.typeof(schema={"x": List[int], "y": int}) == bool | ||
assert ( | ||
expr.typeof(schema={"x": Optional[List[float]], "y": float}) | ||
== Optional[bool] | ||
) | ||
|
||
# however doesn't work if item is not of the same type as the list elements | ||
with pytest.raises(ValueError): | ||
expr.typeof(schema={"x": List[int], "y": str}) | ||
|
||
# can be evaluated with a dataframe | ||
df = pd.DataFrame( | ||
{ | ||
"x": [[1, 2, 3], [4, 5, None], [4, 5, None], None, []], | ||
"y": [1, 5, 3, 4, None], | ||
} | ||
) | ||
schema = {"x": Optional[List[Optional[int]]], "y": Optional[int]} | ||
assert expr.eval(df, schema=schema).tolist() == [ | ||
True, | ||
True, | ||
pd.NA, | ||
pd.NA, | ||
False, | ||
] | ||
|
||
# schema of column must be list of something | ||
with pytest.raises(ValueError): | ||
expr.typeof(schema={"x": int}) | ||
# /docsnip | ||
|
||
|
||
def test_at(): | ||
# docsnip at | ||
from fennel.expr import col | ||
|
||
# docsnip-highlight next-line | ||
expr = col("x").list.at(col("y")) | ||
|
||
# contains works for only list types, index can be int/optional[int] | ||
assert expr.typeof(schema={"x": List[int], "y": int}) == Optional[int] | ||
assert expr.typeof(schema={"x": List[str], "y": int}) == Optional[str] | ||
|
||
schema = {"x": Optional[List[float]], "y": float} | ||
with pytest.raises(Exception): | ||
expr.typeof(schema=schema) | ||
|
||
# can be evaluated with a dataframe | ||
df = pd.DataFrame( | ||
{ | ||
"x": [[1, 2, 3], [4, 5, None], [4, 5, None], None], | ||
"y": [1, 5, 0, 4], | ||
} | ||
) | ||
schema = {"x": Optional[List[Optional[int]]], "y": int} | ||
assert expr.eval(df, schema=schema).tolist() == [2, pd.NA, 4, pd.NA] | ||
|
||
# schema of column must be list of something | ||
with pytest.raises(ValueError): | ||
expr.typeof(schema={"x": int}) | ||
# /docsnip | ||
|
||
|
||
def test_at_negative(): | ||
# docsnip at_negative | ||
from fennel.expr import col | ||
|
||
# docsnip-highlight next-line | ||
expr = col("x").list.at(col("y")) | ||
|
||
# negative indices until -len(list) are allowed and do reverse indexing | ||
# beyond that, start returning None like other out-of-bounds indices | ||
df = pd.DataFrame( | ||
{ | ||
"x": [[1, 2, 3], [4, 5, None], [4, 5, None], None], | ||
"y": [-1, -5, -2, -4], | ||
} | ||
) | ||
schema = {"x": Optional[List[Optional[int]]], "y": int} | ||
assert expr.eval(df, schema=schema).tolist() == [3, pd.NA, 5, pd.NA] | ||
# /docsnip |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
--- | ||
title: At | ||
order: 0 | ||
status: published | ||
--- | ||
|
||
### At | ||
|
||
Function to get the value of the element at a given index of the list. | ||
|
||
#### Parameters | ||
<Expandable title="index" type="Expr"> | ||
The index at which list's value needs to be evaluated. This expression is expected | ||
to evaluate to an int. Fennel supports indexing by negative integers as well. | ||
</Expandable> | ||
|
||
<pre snippet="api-reference/expressions/list#at" | ||
status="success" message="Getting the value of a list's element at given index"> | ||
</pre> | ||
|
||
<pre snippet="api-reference/expressions/list#at_negative" | ||
status="success" message="Also works with negative indices"> | ||
</pre> | ||
|
||
|
||
#### Returns | ||
<Expandable type="Expr"> | ||
Returns an expression object denoting the value of the list at the given index. | ||
If the index is out of bounds of list's length, `None` is returned. Consequently, | ||
for a list of elements of type `T`, `at` always returns `Optional[T]`. | ||
|
||
Fennel also supports negative indices: -1 maps to the last element of the list, | ||
-2 to the second last element of the list and so on. Negative indices smaller | ||
than -len start returning `None` like other out-of-bound indices. | ||
</Expandable> | ||
|
||
|
||
#### Errors | ||
<Expandable title="Use of invalid types"> | ||
The `list` namespace must be invoked on an expression that evaluates to list | ||
or optional of list. Similarly, `index` must evaluate to an element of type `int` | ||
or `Optional[int]`. | ||
</Expandable> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
--- | ||
title: Contains | ||
order: 0 | ||
status: published | ||
--- | ||
|
||
### Contains | ||
|
||
Function to check if the given list contains a given element. | ||
|
||
#### Parameters | ||
<Expandable title="item" type="Expr"> | ||
`contains` check if the base list contains the `item` or not. | ||
</Expandable> | ||
|
||
<pre snippet="api-reference/expressions/list#contains" | ||
status="success" message="Checking if a list contains a given item"> | ||
</pre> | ||
|
||
|
||
#### Returns | ||
<Expandable type="Expr"> | ||
Returns an expression object denoting the result of the `contains` expression. | ||
The resulting expression is of type `bool` or `Optional[bool]` depending on | ||
either of input/item being nullable. | ||
|
||
Note that, Fennel expressions borrow semantics from SQL and treat `None` as | ||
an unknown value. As a result, the following rules apply to `contains` in | ||
presence of nulls: | ||
- If the base list itself is `None`, the result is `None` regardless of the item. | ||
- If the item is `None`, the result is `None` regardless of the list, unless it | ||
is empty, in which case, the answer is `False` (after all, if the list is empty, | ||
no matter the value of the item, it's not present in the list). | ||
- If the item is not `None` and is present in the list, the answer is obviously | ||
`True` | ||
- However, if the item is not `None`, is not present in the list but the list | ||
has some `None` element, the result is still `None` (because the `None` values | ||
in the list may have been that element - we just can't say) | ||
|
||
This is somewhat (but not exactly) similar to Spark's `array_contains` [function](https://docs.databricks.com/en/sql/language-manual/functions/array_contains.html). | ||
</Expandable> | ||
:::info | ||
If you are interested in checking if a list has any `None` elements, a better | ||
way of doing that is to use [hasnull](/api-reference/expressions/list/hasnull). | ||
::: | ||
|
||
|
||
#### Errors | ||
<Expandable title="Use of invalid types"> | ||
The `list` namespace must be invoked on an expression that evaluates to list | ||
or optional of list. Similarly, `item` must evaluate to an element of type `T` | ||
or `Optional[T]` if the list itself was of type `List[T]` (or `Optional[List[T]]`) | ||
</Expandable> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
--- | ||
title: Has Null | ||
order: 0 | ||
status: published | ||
--- | ||
|
||
### Has Null | ||
|
||
Function to check if the given list has any `None` values. | ||
|
||
<pre snippet="api-reference/expressions/list#has_null" | ||
status="success" message="Checking if a list has any null values"> | ||
</pre> | ||
|
||
#### Returns | ||
<Expandable type="Expr"> | ||
Returns an expression object denoting the result of the `hasnull` function. | ||
The resulting expression is of type `bool` or `Optional[bool]` depending on | ||
the input being nullable. | ||
</Expandable> | ||
|
||
|
||
#### Errors | ||
<Expandable title="Use of invalid types"> | ||
The `list` namespace must be invoked on an expression that evaluates to list | ||
or optional of list. | ||
</Expandable> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
--- | ||
title: Len | ||
order: 0 | ||
status: published | ||
--- | ||
|
||
### Len | ||
|
||
Function to get the length of a list. | ||
|
||
<pre snippet="api-reference/expressions/list#len" | ||
status="success" message="Getting the length of a list"> | ||
</pre> | ||
|
||
#### Returns | ||
<Expandable type="Expr"> | ||
Returns an expression object denoting the result of the `len` function. | ||
The resulting expression is of type `int` or `Optional[int]` depending on | ||
the input being nullable. | ||
</Expandable> | ||
|
||
|
||
#### Errors | ||
<Expandable title="Use of invalid types"> | ||
The `list` namespace must be invoked on an expression that evaluates to list | ||
or optional of list. | ||
</Expandable> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.