Skip to content

Commit

Permalink
[DEV-2054] added support for new extract and extract_historical funct…
Browse files Browse the repository at this point in the history
…ions (#332)

* added support for new extract and extract_historical functions

* added support for new extract and extract_historical functions

* Updated the documents

* Updated the documents

* fixed auto lint changes

* fixed auto lint changes

* Removed Featureset as input type from client function docstrings and parameter definition

* Removed Featureset as input type from client function docstrings and parameter definition

* Pushed version

* pr changes

* ran black

* fixed request error

* Added warning about deprecation
  • Loading branch information
nonibansal authored Jan 15, 2024
1 parent 8dc581f commit 59f60ee
Show file tree
Hide file tree
Showing 30 changed files with 349 additions and 218 deletions.
32 changes: 13 additions & 19 deletions docs/examples/api-reference/client.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import unittest
from datetime import datetime
from typing import Optional

import pandas as pd
import requests
from typing import Optional

from fennel.datasets import dataset, field
from fennel.featuresets import feature, featureset, extractor
Expand Down Expand Up @@ -102,25 +102,21 @@ def test_dag_resolution(self, client):
assert response.status_code == requests.codes.OK, response.json()
# /docsnip

# docsnip extract_features_api
feature_df = client.extract_features(
output_feature_list=[
UserFeatures,
],
input_feature_list=[UserFeatures.userid],
# docsnip extract_api
feature_df = client.extract(
outputs=[UserFeatures],
inputs=[UserFeatures.userid],
input_dataframe=pd.DataFrame(
{"UserFeatures.userid": [18232, 18234]}
),
)
self.assertEqual(feature_df.shape, (2, 7))
# /docsnip

# docsnip extract_historical_features_api
response = client.extract_historical_features(
output_feature_list=[
UserFeatures,
],
input_feature_list=[UserFeatures.userid],
# docsnip extract_historical_api
response = client.extract_historical(
outputs=[UserFeatures],
inputs=[UserFeatures.userid],
format="pandas",
input_dataframe=pd.DataFrame(
{"UserFeatures.userid": [18232, 18234], "timestamp": [now, now]}
Expand All @@ -130,7 +126,7 @@ def test_dag_resolution(self, client):
# /docsnip

with self.assertRaises(NotImplementedError) as e:
# docsnip extract_historical_features_s3
# docsnip extract_historical_s3
from fennel.sources import S3

s3 = S3(
Expand All @@ -143,11 +139,9 @@ def test_dag_resolution(self, client):
)
s3_output_connection = s3.bucket("bucket", prefix="output")

response = client.extract_historical_features(
output_feature_list=[
UserFeatures,
],
input_feature_list=[UserFeatures.userid],
response = client.extract_historical(
outputs=[UserFeatures],
inputs=[UserFeatures.userid],
format="csv",
timestamp_column="timestamp",
input_s3=s3_input_connection,
Expand Down
16 changes: 8 additions & 8 deletions docs/examples/api-reference/rest-api.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,17 @@ def test_log(self, mock_post):
assert response.status_code == requests.codes.OK, response.json()
# /docsnip

# docsnip rest_extract_features_api
url = "{}/api/v1/extract_features".format(SERVER)
# docsnip rest_extract_api
url = "{}/api/v1/extract".format(SERVER)
headers = {"Content-Type": "application/json"}
data = [
{"UserFeatures.userid": 1},
{"UserFeatures.userid": 2},
{"UserFeatures.userid": 3},
]
req = {
"output_features": ["UserFeatures"],
"input_features": ["UserFeatures.userid"],
"outputs": ["UserFeatures"],
"inputs": ["UserFeatures.userid"],
"data": data,
"log": True,
"workflow": "test",
Expand All @@ -65,13 +65,13 @@ def test_log(self, mock_post):
assert response.status_code == requests.codes.OK, response.json()
# /docsnip

# docsnip rest_extract_features_api_columnar
url = "{}/api/v1/extract_features".format(SERVER)
# docsnip rest_extract_api_columnar
url = "{}/api/v1/extract".format(SERVER)
headers = {"Content-Type": "application/json"}
data = {"UserFeatures.userid": [1, 2, 3]}
req = {
"output_features": ["UserFeatures"],
"input_features": ["UserFeatures.userid"],
"outputs": ["UserFeatures"],
"inputs": ["UserFeatures.userid"],
"data": data,
"log": True,
"workflow": "test",
Expand Down
6 changes: 3 additions & 3 deletions docs/examples/datasets/lookups.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ def test_user_dataset_lookup(client):

res = client.log("fennel_webhook", "User", df)
assert res.status_code == 200, res.json()
feature_df = client.extract_features(
output_feature_list=[UserFeature.in_home_city],
input_feature_list=[UserFeature.uid],
feature_df = client.extract(
outputs=[UserFeature.in_home_city],
inputs=[UserFeature.uid],
input_dataframe=pd.DataFrame({"UserFeature.uid": [1, 2, 3]}),
)
assert feature_df["UserFeature.in_home_city"].tolist() == [
Expand Down
6 changes: 3 additions & 3 deletions docs/examples/datasets/operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,9 @@ def test_aggregate(client):
three_days_ago = dt - timedelta(days=3)
ts_series = pd.Series([dt, yes, dt, three_days_ago, yes])
uids = pd.Series([1, 1, 2, 2, 2])
df = client.extract_historical_features(
input_feature_list=[UserAdStatsFeatures.uid],
output_feature_list=[UserAdStatsFeatures],
df = client.extract_historical(
inputs=[UserAdStatsFeatures.uid],
outputs=[UserAdStatsFeatures],
input_dataframe=pd.DataFrame(
{"UserAdStatsFeatures.uid": uids, "timestamps": ts_series}
),
Expand Down
6 changes: 3 additions & 3 deletions docs/examples/examples/ecommerce.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,12 @@ def test_feature(self, client):
response = client.log("fennel_webhook", "Order", df)
assert response.status_code == requests.codes.OK, response.json()

feature_df = client.extract_features(
output_feature_list=[
feature_df = client.extract(
outputs=[
"UserSeller.num_orders_1d",
"UserSeller.num_orders_1w",
],
input_feature_list=[
inputs=[
"UserSeller.uid",
"UserSeller.seller_id",
],
Expand Down
6 changes: 3 additions & 3 deletions docs/examples/featuresets/e2e_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ class Request:
def test_e2e_extraction(client):
client.sync(featuresets=[User, UserPost, Request])
# docsnip e2e_extraction
feature_df = client.extract_features(
output_feature_list=[
feature_df = client.extract(
outputs=[
"User.age",
"UserPost.score",
"UserPost.affinity"
# there are 10 features in this list
],
input_feature_list=[
inputs=[
"User.id",
"UserPost.uid",
"UserPost.pid",
Expand Down
12 changes: 6 additions & 6 deletions docs/examples/featuresets/overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,9 @@ def test_multiple_features_extracted(client):
res = client.log("fennel_webhook", "UserInfo", df)
assert res.status_code == 200

df = client.extract_features(
output_feature_list=[UserLocationFeatures],
input_feature_list=[UserLocationFeatures.uid],
df = client.extract(
outputs=[UserLocationFeatures],
inputs=[UserLocationFeatures.uid],
input_dataframe=pd.DataFrame(
{"UserLocationFeatures.uid": [1, 2, 3]},
),
Expand Down Expand Up @@ -292,9 +292,9 @@ def test_extractors_across_featuresets(client):
res = client.log("fennel_webhook", "UserInfo", df)
assert res.status_code == 200

df = client.extract_features(
output_feature_list=[UserLocationFeaturesRefactored],
input_feature_list=[Request.uid],
df = client.extract(
outputs=[UserLocationFeaturesRefactored],
inputs=[Request.uid],
input_dataframe=pd.DataFrame(
{"Request.uid": [1, 2, 3]},
),
Expand Down
18 changes: 9 additions & 9 deletions docs/examples/featuresets/reading_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@ def test_lookup_in_extractor(client):
res = client.log("fennel_webhook", "User", data)
assert res.status_code == 200, res.json()

feature_df = client.extract_features(
output_feature_list=[UserFeatures.name],
input_feature_list=[UserFeatures.uid],
feature_df = client.extract(
outputs=[UserFeatures.name],
inputs=[UserFeatures.uid],
input_dataframe=pd.DataFrame(
{
"UserFeatures.uid": [1, 2, 3, 4],
Expand All @@ -105,9 +105,9 @@ def test_lookup_in_extractor(client):
expected = ["Alice", "Bob", "Charlie", "Unknown"]
assert feature_df["UserFeatures.name"].tolist() == expected

feature_df = client.extract_features(
output_feature_list=[UserFeaturesDerived.name],
input_feature_list=[Request.user_id],
feature_df = client.extract(
outputs=[UserFeaturesDerived.name],
inputs=[Request.user_id],
input_dataframe=pd.DataFrame(
{
"Request.user_id": [1, 2, 3, 4],
Expand All @@ -116,9 +116,9 @@ def test_lookup_in_extractor(client):
)
assert feature_df["UserFeaturesDerived.name"].tolist() == expected

feature_df = client.extract_features(
output_feature_list=[UserFeaturesDerived2.name],
input_feature_list=[Request2.uid],
feature_df = client.extract(
outputs=[UserFeaturesDerived2.name],
inputs=[Request2.uid],
input_dataframe=pd.DataFrame(
{
"Request2.uid": [1, 2, 3, 4],
Expand Down
14 changes: 7 additions & 7 deletions docs/examples/getting-started/quickstart.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# docsnip imports
from datetime import datetime, timedelta
from typing import Optional

import pandas as pd
import requests
from typing import Optional

from fennel.datasets import dataset, pipeline, field, Dataset
from fennel.featuresets import feature, featureset, extractor
Expand Down Expand Up @@ -145,12 +145,12 @@ def myextractor(cls, ts: pd.Series, uids: pd.Series, sellers: pd.Series):
# /docsnip

# docsnip query
feature_df = client.extract_features(
output_feature_list=[
feature_df = client.extract(
outputs=[
UserSellerFeatures.num_orders_1d,
UserSellerFeatures.num_orders_1w,
],
input_feature_list=[
inputs=[
UserSellerFeatures.uid,
UserSellerFeatures.seller_id,
],
Expand All @@ -170,12 +170,12 @@ def myextractor(cls, ts: pd.Series, uids: pd.Series, sellers: pd.Series):
# /docsnip

# docsnip historical
feature_df = client.extract_historical_features(
output_feature_list=[
feature_df = client.extract_historical(
outputs=[
UserSellerFeatures.num_orders_1d,
UserSellerFeatures.num_orders_1w,
],
input_feature_list=[
inputs=[
UserSellerFeatures.uid,
UserSellerFeatures.seller_id,
],
Expand Down
10 changes: 6 additions & 4 deletions docs/examples/testing-and-ci-cd/ci_cd/test.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from datetime import datetime

import pandas as pd
from fennel.test_lib import mock
from ci_cd.datasets import Ticket
from ci_cd.featuresets import TicketFeatures

from fennel.test_lib import mock


@mock
def test_featureset_metaflags(client):
Expand All @@ -17,9 +19,9 @@ def test_featureset_metaflags(client):
columns=["ticket_id", "price", "at"],
)
client.log("example", "ticket_sale", df)
feature_df = client.extract_features(
input_feature_list=[TicketFeatures.ticket_id],
output_feature_list=[TicketFeatures.price, TicketFeatures.ticket_id],
feature_df = client.extract(
inputs=[TicketFeatures.ticket_id],
outputs=[TicketFeatures.price, TicketFeatures.ticket_id],
input_dataframe=pd.DataFrame(
data={"TicketFeatures.ticket_id": ["123", "456"]}
),
Expand Down
10 changes: 4 additions & 6 deletions docs/examples/testing-and-ci-cd/unit_tests.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from datetime import datetime, timedelta
from typing import Optional

import pandas as pd
import requests
from typing import Optional

# docsnip datasets
from fennel.datasets import dataset, field, pipeline, Dataset
Expand Down Expand Up @@ -234,11 +234,9 @@ def test_dag_resolution(self, client):
response = client.log("fennel_webhook", "UserInfoDataset", df)
assert response.status_code == requests.codes.OK, response.json()

feature_df = client.extract_features(
output_feature_list=[
UserInfoMultipleExtractor,
],
input_feature_list=[UserInfoMultipleExtractor.userid],
feature_df = client.extract(
outputs=[UserInfoMultipleExtractor],
inputs=[UserInfoMultipleExtractor.userid],
input_dataframe=pd.DataFrame(
{"UserInfoMultipleExtractor.userid": [18232, 18234]}
),
Expand Down
Loading

0 comments on commit 59f60ee

Please sign in to comment.