Update docs for extract historical request (#313)

fennel-ai · Nov 30, 2023 · 706d47b · 706d47b
1 parent 627acc2
commit 706d47b
Show file tree

Hide file tree

Showing 2 changed files with 39 additions and 4 deletions.
diff --git a/docs/examples/api-reference/client.py b/docs/examples/api-reference/client.py
@@ -128,3 +128,32 @@ def test_dag_resolution(self, client):
             timestamp_column="timestamp",
         )
         # /docsnip
+
+        with self.assertRaises(NotImplementedError) as e:
+            # docsnip extract_historical_features_s3
+            from fennel.sources import S3
+
+            s3 = S3(
+                name="extract_hist_input",
+                aws_access_key_id="<ACCESS KEY HERE>",
+                aws_secret_access_key="<SECRET KEY HERE>",
+            )
+            s3_input_connection = s3.bucket(
+                "bucket", prefix="data/user_features"
+            )
+            s3_output_connection = s3.bucket("bucket", prefix="output")
+
+            response = client.extract_historical_features(
+                output_feature_list=[
+                    UserFeatures,
+                ],
+                input_feature_list=[UserFeatures.userid],
+                format="csv",
+                timestamp_column="timestamp",
+                input_s3=s3_input_connection,
+                output_s3=s3_output_connection,
+            )
+            # /docsnip
+        assert "Only pandas format is supported in MockClient" in str(
+            e.exception
+        )
diff --git a/docs/pages/api-reference/client.md b/docs/pages/api-reference/client.md
@@ -88,15 +88,15 @@ This api is an asynchronous api that returns a request id and the path to the ou
 * `timestamp_column: str` - The name of the column containing the timestamps.
 * `format: str` - The format of the input data. Can be either "pandas", "csv", "json" or "parquet". Default is "pandas".
 * `input_dataframe: Optional[pd.DataFrame]` - Dataframe containing the input features. Only relevant when format is "pandas".
-* `output_bucket: Optional[str]` - The name of the S3 bucket where the output data should be stored.
-* `output_prefix: Optional[str]` - The prefix of the S3 key where the output data should be stored.
+* `output_s3: Optional[sources.S3Connector]` - Specifies the S3 bucket, prefix, and optional credentials for where the output data should be stored.
 
 The following parameters are only relevant when format is "csv", "json" or "parquet".
 
-* `input_bucket: Optional[str]` - The name of the S3 bucket containing the input data. 
-* `input_prefix: Optional[str]` - The prefix of the S3 key containing the input data. 
+* `input_s3: Optional[sources.S3Connector]` - Specifies the S3 bucket, prefix, and optional credentials for the input data
 * ` feature_to_column_map (Optional[Dict[Feature, str]])`: A dictionary mapping features to column names. 
 
+The `S3Connector` parameters are provided via the `S3.bucket()` function from the `sources` module. See [Sources](/api-reference/sources#s3)
+
 **Returns:**
 
 * `Dict[str, Any]` - A dictionary containing the following information:
@@ -113,8 +113,14 @@ A completion rate of 1.0 and a failure rate of 0.0 indicates that all processing
 
 **Example**
 
+Here is an example with `format="pandas"` and the default output bucket
+
 <pre snippet="api-reference/client#extract_historical_features_api"></pre>
 
+Here is an example specifying input and output S3 buckets
+
+<pre snippet="api-reference/client#extract_historical_features_s3"></pre>
+
 ****
 
 ### **extract_historical_features_progress**