From 706d47bccc2a96d67ef7ef3bd9e03a991f9b04e6 Mon Sep 17 00:00:00 2001 From: Zaki Siddiqui Date: Wed, 29 Nov 2023 20:18:38 -0800 Subject: [PATCH] Update docs for extract historical request (#313) --- docs/examples/api-reference/client.py | 29 +++++++++++++++++++++++++++ docs/pages/api-reference/client.md | 14 +++++++++---- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/docs/examples/api-reference/client.py b/docs/examples/api-reference/client.py index 19645b520..73ed77c91 100644 --- a/docs/examples/api-reference/client.py +++ b/docs/examples/api-reference/client.py @@ -128,3 +128,32 @@ def test_dag_resolution(self, client): timestamp_column="timestamp", ) # /docsnip + + with self.assertRaises(NotImplementedError) as e: + # docsnip extract_historical_features_s3 + from fennel.sources import S3 + + s3 = S3( + name="extract_hist_input", + aws_access_key_id="", + aws_secret_access_key="", + ) + s3_input_connection = s3.bucket( + "bucket", prefix="data/user_features" + ) + s3_output_connection = s3.bucket("bucket", prefix="output") + + response = client.extract_historical_features( + output_feature_list=[ + UserFeatures, + ], + input_feature_list=[UserFeatures.userid], + format="csv", + timestamp_column="timestamp", + input_s3=s3_input_connection, + output_s3=s3_output_connection, + ) + # /docsnip + assert "Only pandas format is supported in MockClient" in str( + e.exception + ) diff --git a/docs/pages/api-reference/client.md b/docs/pages/api-reference/client.md index d0bf086f1..0acd8ef05 100644 --- a/docs/pages/api-reference/client.md +++ b/docs/pages/api-reference/client.md @@ -88,15 +88,15 @@ This api is an asynchronous api that returns a request id and the path to the ou * `timestamp_column: str` - The name of the column containing the timestamps. * `format: str` - The format of the input data. Can be either "pandas", "csv", "json" or "parquet". Default is "pandas". * `input_dataframe: Optional[pd.DataFrame]` - Dataframe containing the input features. Only relevant when format is "pandas". -* `output_bucket: Optional[str]` - The name of the S3 bucket where the output data should be stored. -* `output_prefix: Optional[str]` - The prefix of the S3 key where the output data should be stored. +* `output_s3: Optional[sources.S3Connector]` - Specifies the S3 bucket, prefix, and optional credentials for where the output data should be stored. The following parameters are only relevant when format is "csv", "json" or "parquet". -* `input_bucket: Optional[str]` - The name of the S3 bucket containing the input data. -* `input_prefix: Optional[str]` - The prefix of the S3 key containing the input data. +* `input_s3: Optional[sources.S3Connector]` - Specifies the S3 bucket, prefix, and optional credentials for the input data * ` feature_to_column_map (Optional[Dict[Feature, str]])`: A dictionary mapping features to column names. +The `S3Connector` parameters are provided via the `S3.bucket()` function from the `sources` module. See [Sources](/api-reference/sources#s3) + **Returns:** * `Dict[str, Any]` - A dictionary containing the following information: @@ -113,8 +113,14 @@ A completion rate of 1.0 and a failure rate of 0.0 indicates that all processing **Example** +Here is an example with `format="pandas"` and the default output bucket +

 
+Here is an example specifying input and output S3 buckets
+
+

+
 ****
 
 ### **extract_historical_features_progress**