Skip to content

Commit

Permalink
query_offline: Remove format parameter from query_offline
Browse files Browse the repository at this point in the history
  • Loading branch information
aditya-nambiar authored and nikhilgarg28 committed Oct 9, 2024
1 parent 27e1a53 commit 562da06
Show file tree
Hide file tree
Showing 7 changed files with 16 additions and 41 deletions.
10 changes: 5 additions & 5 deletions docs/examples/api-reference/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from fennel.lib import includes, meta, inputs, outputs
from fennel.testing import mock
from fennel.expr.expr import col, lit, when
from fennel.client import Client

webhook = Webhook(name="fennel_webhook")

Expand Down Expand Up @@ -120,7 +121,6 @@ def test_dag_resolution(self, client):
response = client.query_offline(
outputs=[UserFeatures],
inputs=[UserFeatures.userid],
format="pandas",
input_dataframe=pd.DataFrame(
{"UserFeatures.userid": [18232, 18234], "timestamp": [now, now]}
),
Expand All @@ -136,7 +136,7 @@ def test_dag_resolution(self, client):
)
# /docsnip

with self.assertRaises(NotImplementedError) as e:
with self.assertRaises(ValueError) as e:
# docsnip query_offline_s3
from fennel.connectors import S3

Expand All @@ -153,12 +153,12 @@ def test_dag_resolution(self, client):
response = client.query_offline(
outputs=[UserFeatures],
inputs=[UserFeatures.userid],
format="csv",
timestamp_column="timestamp",
input_s3=s3_input_connection,
output_s3=s3_output_connection,
)
# /docsnip
assert "Only pandas format is supported in MockClient" in str(
e.exception
assert (
"input must contain a key 'input_dataframe' with the input dataframe"
in str(e.exception)
)
1 change: 0 additions & 1 deletion docs/examples/getting-started/quickstart.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ def myextractor(cls, ts: pd.Series, uids: pd.Series, sellers: pd.Series):
UserSellerFeatures.seller_id,
],
timestamp_column="timestamps",
format="pandas",
input_dataframe=pd.DataFrame(
[[1, 1, now], [1, 2, now], [1, 1, now - day], [1, 2, now - day]],
columns=[
Expand Down
12 changes: 3 additions & 9 deletions docs/pages/api-reference/client/query-offline.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,12 @@ either as Feature objects, or Featureset objects (in which case all features und
that featureset are queried) or strings representing fully qualified feature names.
</Expandable>

<Expandable title="format" type='"pandas" | "csv" | "json" | "parquet"' defaultVal="pandas">
The format of the input data
</Expandable>

<Expandable title="input_dataframe" type="pd.Dataframe">
<Expandable title="input_dataframe" type="Optional[pd.Dataframe]">
A pandas dataframe object that contains the values of all features in the inputs
list. Each row of the dataframe can be thought of as one entity for which
features need to be queried.

Only relevant when `format` is "pandas".
This parameter is mutually exclusive with `input_s3`.
</Expandable>

<Expandable title="input_s3" type="Optional[connectors.S3]">
Expand All @@ -39,8 +35,6 @@ In such cases, input data can be written to S3 and the location of the file is
sent as `input_s3` via `S3.bucket()` function of [S3](/api-reference/connectors/s3)
connector.

This parameter makes sense only when `format` isn't "pandas".

When using this option, please ensure that Fennel's data connector
IAM role has the ability to execute read & list operations on this bucket -
talk to Fennel support if you need help.
Expand Down Expand Up @@ -115,7 +109,7 @@ in order to resolve the path from the input features to the output features.

===
<pre name="Request" snippet="api-reference/client/query#extract_historical_api"
status="success" message="Example with `format='pandas'` & default s3 output"
status="success" message="Example with pandas input & default s3 output"
></pre>
<pre name="Response" snippet="api-reference/client/query#extract_historical_response"
status="success" message="Response of extract historical"
Expand Down
4 changes: 4 additions & 0 deletions fennel/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# Changelog

## [1.5.36] - 2024-10-09
- Remove format parameter from query_offline.

## [1.5.35] - 2024-10-08
- Enable discrete window aggregation.

Expand Down
23 changes: 3 additions & 20 deletions fennel/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,6 @@ def query_offline(
inputs: List[Union[Feature, str]],
outputs: List[Union[Feature, Featureset, str]],
timestamp_column: str,
format: str = "pandas",
input_dataframe: Optional[pd.DataFrame] = None,
input_s3: Optional[S3Connector] = None,
output_s3: Optional[S3Connector] = None,
Expand All @@ -481,18 +480,14 @@ def query_offline(
timestamp_column (str): The name of the column containing timestamps.
format (str): The format of the input data. Can be either "pandas",
"csv", "json" or "parquet". Default is "pandas".
input_dataframe (Optional[pd.DataFrame]): Dataframe containing the input
features. Only relevant when format is "pandas".
features.
output_s3 (Optional[S3Connector]): Contains the S3 info -- bucket,
prefix, and optional access key id and secret key -- used for
storing the output of the extract historical request
The following parameters are only relevant when format is "csv", "json"
or "parquet".
The following parameters are only relevant when the input data is provided via S3
input_s3 (Optional[connectors.S3Connector]): The info for the input S3
data, containing bucket, prefix, and optional access key id and
Expand All @@ -509,13 +504,6 @@ def query_offline(
indicates that all processing has been completed. A failure rate of 0.0
indicates that all processing has been completed successfully.
"""
if format not in ["pandas", "csv", "json", "parquet"]:
raise Exception(
"Invalid input format. "
"Please provide one of the following formats: "
"'pandas', 'csv', 'json' or 'parquet'."
)

input_feature_names = []
for input_feature in inputs:
if isinstance(input_feature, Feature):
Expand All @@ -535,12 +523,7 @@ def query_offline(

input_info: Dict[str, Any] = {}
extract_historical_input = {}
if format == "pandas":
if input_dataframe is None:
raise Exception(
"Input dataframe not found in input dictionary. "
"Please provide a dataframe as value for the key 'input_dataframe'."
)
if input_dataframe is not None:
if not isinstance(input_dataframe, pd.DataFrame):
raise Exception(
"Input dataframe is not of type pandas.DataFrame, "
Expand Down
5 changes: 0 additions & 5 deletions fennel/testing/mock_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,16 +293,11 @@ def query_offline(
inputs: List[Union[Feature, str]],
outputs: List[Union[Feature, Featureset, str]],
timestamp_column: str,
format: str = "pandas",
input_dataframe: Optional[pd.DataFrame] = None,
input_s3: Optional[S3Connector] = None,
output_s3: Optional[S3Connector] = None,
feature_to_column_map: Optional[Dict[Feature, str]] = None,
) -> Union[pd.DataFrame, pd.Series]:
if format != "pandas":
raise NotImplementedError(
"Only pandas format is supported in MockClient"
)
if input_dataframe is None:
raise ValueError(
"input must contain a key 'input_dataframe' with the input dataframe"
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "fennel-ai"
version = "1.5.35"
version = "1.5.36"
description = "The modern realtime feature engineering platform"
authors = ["Fennel AI <[email protected]>"]
packages = [{ include = "fennel" }]
Expand Down

0 comments on commit 562da06

Please sign in to comment.