Get tests passing

fennel-ai · Sep 4, 2024 · f86e418 · f86e418
1 parent 19a52b9
commit f86e418
Show file tree

Hide file tree

Showing 25 changed files with 616 additions and 494 deletions.
diff --git a/.wordlist.txt b/.wordlist.txt
@@ -2,14 +2,21 @@ ACS
 AES
 APIs
 ARNs
+aren
 AST
 AdministratorAccess
 AllowKinesisAccess
 AssumeRole
 Auth
 Avro
 BigTable
+Bitwise
 Bytewax
+Ceil
+ceil
+Concat
+const
+concat
 CDF
 CIDR
 DDL
@@ -22,7 +29,12 @@ Deltalake
 DescribeStream
 DescribeStreamConsumer
 DescribeStreamSummary
+disambiguity
 Dockerfile
+expr
+Eval
+eval
+endswith
 FennelDataAccessRole
 Flink
 Flink's
@@ -54,10 +66,13 @@ Kinesis
 Kubernetes
 LHS
 LastK
+len
+lits
 ListShards
 MSK
 MockClient
 Nones
+nullness
 OAuth
 OOM
 OWASP
@@ -69,10 +84,12 @@ OpenSSL's
 PII
 PLAINTEXT
 POC
+Polars
 PagerDuty
 Personalization
 PoolableConnectionFactory
 Preproc
+predictate
 PrivateLink
 Pulumi
 PyO
@@ -213,6 +230,7 @@ featureset
 featuresets
 fintech
 firstName
+fillnull
 forgotten
 frontend
 func
@@ -236,7 +254,9 @@ indirections
 init
 initializer
 interop
+ints
 ip
+isnull
 ish
 ith
 jdbc
@@ -262,6 +282,8 @@ mysql
 namespace
 nan
 nat
+NaT
+num
 natively
 noop
 noqa
@@ -280,6 +302,7 @@ personalization
 pid
 postgres
 pre
+precisions
 precompute
 precomputed
 prepend
@@ -317,19 +340,27 @@ snowflakecomputing
 src
 sso
 stateful
+startswith
 str
 strftime
+strptime
+Strptime
 struct
+structs
 sts
 subnets
 tiering
 timeframes
+Typeof
+typeof
+TODO
 uDDsketch
 ubuntu
 uid
 uids
 uint
 uints
+unary
 unbundled
 uncomment
 unhashable

diff --git a/docs/examples/api-reference/expressions/basic.py b/docs/examples/api-reference/expressions/basic.py
@@ -2,6 +2,7 @@
 from typing import Optional
 import pandas as pd
 
+
 def test_unary_not():
     # docsnip expr_unary_not
     from fennel.expr import lit
@@ -15,6 +16,7 @@ def test_unary_not():
     assert expr.eval(df, schema={"x": int}).tolist() == [False, False, False]
     # /docsnip
 
+
 def test_col():
     # docsnip expr_col
     from fennel.expr import col
@@ -38,10 +40,16 @@ def test_col():
 
     # can be evaluated with a dataframe
     import pandas as pd
+
     df = pd.DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]})
-    assert expr.eval(df, schema={"x": int, "y": float}).tolist() == [2.0, 4.0, 6.0]    
+    assert expr.eval(df, schema={"x": int, "y": float}).tolist() == [
+        2.0,
+        4.0,
+        6.0,
+    ]
     # /docsnip
 
+
 def test_when_then():
     # docsnip expr_when_then
     from fennel.expr import when, col, InvalidExprException
@@ -61,6 +69,7 @@ def test_when_then():
 
     # can be evaluated with a dataframe
     import pandas as pd
+
     df = pd.DataFrame({"x": [True, False, True]})
     assert expr.eval(df, schema={"x": bool}).tolist() == [1, 0, 1]
 
@@ -74,6 +83,7 @@ def test_when_then():
     assert expr.typeof(schema={"x": bool}) == Optional[int]
     # /docsnip
 
+
 def test_isnull():
     # docsnip expr_isnull
     from fennel.expr import col
@@ -95,9 +105,14 @@ def test_isnull():
     import pandas as pd
 
     df = pd.DataFrame({"x": pd.Series([1, 2, None], dtype=pd.Int64Dtype())})
-    assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [False, False, True]
+    assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [
+        False,
+        False,
+        True,
+    ]
     # /docsnip
 
+
 def test_fillnull():
     # docsnip expr_fillnull
     from fennel.expr import col, lit
@@ -118,9 +133,14 @@ def test_fillnull():
 
     expr = col("x").fillnull(lit(10))
     df = pd.DataFrame({"x": pd.Series([1, 2, None], dtype=pd.Int64Dtype())})
-    assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [1., 2., 10.]
+    assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [
+        1.0,
+        2.0,
+        10.0,
+    ]
     # /docsnip
 
+
 def test_lit():
     # docsnip expr_lit
     from fennel.expr import lit, col
@@ -135,4 +155,4 @@ def test_lit():
     expr = col("x") + lit(1)
     df = pd.DataFrame({"x": pd.Series([1, 2, None], dtype=pd.Int64Dtype())})
     assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [2, 3, pd.NA]
-    # /docsnip
+    # /docsnip
diff --git a/docs/examples/api-reference/expressions/binary.py b/docs/examples/api-reference/expressions/binary.py
@@ -1,6 +1,7 @@
 from typing import Optional
 import pytest
 
+
 def test_typeof():
     # docsnip expr_binary_arithmetic
     import pandas as pd
@@ -10,18 +11,24 @@ def test_typeof():
     assert expr.typeof(schema={"x": int, "y": int}) == int
     assert expr.typeof(schema={"x": int, "y": float}) == float
     assert expr.typeof(schema={"x": float, "y": float}) == float
-    assert expr.typeof(schema={"x": Optional[float], "y": int}) == Optional[float]
+    assert (
+        expr.typeof(schema={"x": Optional[float], "y": int}) == Optional[float]
+    )
 
     df = pd.DataFrame({"x": [1, 2, None]})
     expr = lit(1) + col("x")
     assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [2, 3, pd.NA]
-    
+
     expr = lit(1) - col("x")
     assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [0, -1, pd.NA]
-    
+
     expr = lit(1) * col("x")
     assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [1, 2, pd.NA]
-    
+
     expr = lit(1) / col("x")
-    assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [1, 0.5, pd.NA]
+    assert expr.eval(df, schema={"x": Optional[int]}).tolist() == [
+        1,
+        0.5,
+        pd.NA,
+    ]
     # /docsnip
diff --git a/docs/examples/api-reference/expressions/eval.py b/docs/examples/api-reference/expressions/eval.py
@@ -1,6 +1,7 @@
 from typing import Optional
 import pytest
 
+
 def test_typeof():
     # docsnip expr_typeof
     from fennel.expr import lit, col
@@ -12,7 +13,6 @@ def test_typeof():
     assert expr.typeof(schema={"amount": Optional[int]}) == Optional[int]
     assert expr.typeof(schema={"amount": Optional[float]}) == Optional[float]
 
-
     # typeof raises an error if type of 'amount' isn't provided
     with pytest.raises(ValueError):
         expr.typeof()
@@ -48,5 +48,5 @@ def test_eval():
     df = pd.DataFrame({"other": [1, 2, 3]})
     with pytest.raises(Exception):
         expr.eval(df, schema={"amount": int})
-        
-    # /docsnip
+
+    # /docsnip
diff --git a/docs/examples/api-reference/expressions/num.py b/docs/examples/api-reference/expressions/num.py
@@ -2,6 +2,7 @@
 from typing import Optional
 import pandas as pd
 
+
 def test_abs():
     # docsnip abs
     from fennel.expr import col
@@ -28,15 +29,19 @@ def test_floor():
     from fennel.expr import col
 
     # docsnip-highlight next-line
-    expr = col("x").floor() # equivalent to col("x").num.floor()
+    expr = col("x").floor()  # equivalent to col("x").num.floor()
     assert expr.typeof(schema={"x": int}) == int
     assert expr.typeof(schema={"x": Optional[int]}) == Optional[int]
     assert expr.typeof(schema={"x": float}) == int
     assert expr.typeof(schema={"x": Optional[float]}) == Optional[int]
 
     # can be evaluated with a dataframe
     df = pd.DataFrame({"x": pd.Series([1.1, -2.3, None])})
-    assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [1, -3, pd.NA]
+    assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [
+        1,
+        -3,
+        pd.NA,
+    ]
 
     with pytest.raises(ValueError):
         expr.typeof(schema={"x": str})
@@ -48,15 +53,19 @@ def test_ceil():
     from fennel.expr import col
 
     # docsnip-highlight next-line
-    expr = col("x").ceil() # equivalent to col("x").num.ceil()
+    expr = col("x").ceil()  # equivalent to col("x").num.ceil()
     assert expr.typeof(schema={"x": int}) == int
     assert expr.typeof(schema={"x": Optional[int]}) == Optional[int]
     assert expr.typeof(schema={"x": float}) == int
     assert expr.typeof(schema={"x": Optional[float]}) == Optional[int]
 
     # can be evaluated with a dataframe
     df = pd.DataFrame({"x": pd.Series([1.1, -2.3, None])})
-    assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [2, -2, pd.NA]
+    assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [
+        2,
+        -2,
+        pd.NA,
+    ]
 
     with pytest.raises(ValueError):
         expr.typeof(schema={"x": str})
@@ -68,7 +77,7 @@ def test_round():
     from fennel.expr import col
 
     # docsnip-highlight next-line
-    expr = col("x").round() # equivalent to col("x").num.round()
+    expr = col("x").round()  # equivalent to col("x").num.round()
 
     assert expr.typeof(schema={"x": int}) == int
     assert expr.typeof(schema={"x": Optional[int]}) == Optional[int]
@@ -77,7 +86,11 @@ def test_round():
 
     # can be evaluated with a dataframe
     df = pd.DataFrame({"x": pd.Series([1.1, -2.3, None])})
-    assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [1, -2, pd.NA]
+    assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [
+        1,
+        -2,
+        pd.NA,
+    ]
 
     # can also explicit specify the number of decimals
     # docsnip-highlight next-line
@@ -89,16 +102,24 @@ def test_round():
     assert expr.typeof(schema={"x": Optional[float]}) == Optional[float]
 
     df = pd.DataFrame({"x": pd.Series([1.12, -2.37, None])})
-    assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [1.1, -2.4, pd.NA]
+    assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [
+        1.1,
+        -2.4,
+        pd.NA,
+    ]
 
     df = pd.DataFrame({"x": pd.Series([1, -2, None])})
-    assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [1.0, -2.0, pd.NA]
+    assert expr.eval(df, schema={"x": Optional[float]}).tolist() == [
+        1.0,
+        -2.0,
+        pd.NA,
+    ]
 
     # /docsnip
 
     # invalid number of decimals
     with pytest.raises(Exception):
         expr = col("x").round(-1)
-        
+
     with pytest.raises(Exception):
-        expr = col("x").round(1.1)
+        expr = col("x").round(1.1)