Skip to content

Commit

Permalink
extract: Handle codegen for extractors that dont depend on datasets (#…
Browse files Browse the repository at this point in the history
…297)

extract: Handle codegen for not ds depended extractors
  • Loading branch information
aditya-nambiar authored Nov 10, 2023
1 parent 126be47 commit 7b5061e
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 11 deletions.
2 changes: 1 addition & 1 deletion docs/examples/api-reference/operators_ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class UserFirstAction:
def create_user_first_action_category(cls, txns: UserTransactions):
# docsnip first
first_txns = txns.groupby("user_id").first()
return first_txns.drop(["merchant_id"])
return first_txns.drop("merchant_id")
# /docsnip


Expand Down
15 changes: 9 additions & 6 deletions fennel/datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,30 +767,33 @@ def _create_lookup_function(
:param struct: Map from column names to Struct Classes. We use this to
convert any dictionaries back to structs post lookup.
"""
if len(key_fields) == 0:
return None

def lookup(
ts: pd.Series, *args, **kwargs
) -> Tuple[pd.DataFrame, pd.Series]:
if len(key_fields) == 0:
raise Exception(
f"Trying to lookup dataset `{cls_name} with no keys defined.\n"
f"Please define one or more keys using field(key=True) to perform a lookup."
)
if len(args) > 0:
raise ValueError(
f"lookup expects key value arguments and can "
f"Lookup for dataset `{cls_name}` expects key value arguments and can "
f"optionally include fields, found {args}"
)
if len(kwargs) < len(key_fields):
raise ValueError(
f"lookup expects keys of the table being looked up and can "
f"Lookup for dataset `{cls_name}` expects keys of the table being looked up and can "
f"optionally include fields, found {kwargs}"
)
# Check that ts is a series of datetime64[ns]
if not isinstance(ts, pd.Series):
raise ValueError(
f"lookup expects a series of timestamps, found {type(ts)}"
f"Lookup for dataset `{cls_name}` expects a series of timestamps, found {type(ts)}"
)
if not np.issubdtype(ts.dtype, np.datetime64):
raise ValueError(
f"lookup expects a series of timestamps, found {ts.dtype}"
f"Lookup for dataset `{cls_name}` expects a series of timestamps, found {ts.dtype}"
)
# extract keys and fields from kwargs
arr = []
Expand Down
25 changes: 25 additions & 0 deletions fennel/featuresets/featureset.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,31 @@ def __init__(
self._expectation = self._get_expectations()
propogate_fennel_attributes(featureset_cls, self)

def get_dataset_dependencies(self):
"""
This function gets the list of datasets the Featureset depends upon.
This dependency is introduced by features that directly lookup a dataset
via the DS-FS route.
The motivation for this function is to help generated the required code, even
if an extractor does not depend on a dataset, but is part of a featureset which
has these kinds of dependencies.
"""
depended_datasets = []
for f in self._features:
if (
f.extractor is not None
and f.extractor.derived_extractor_info is not None
):
assert (
f.extractor.derived_extractor_info.field.dataset is not None
)
depended_datasets.append(
f.extractor.derived_extractor_info.field.dataset
)

return depended_datasets

# ------------------- Private Methods ----------------------------------

def _add_feature_names_as_attributes(self):
Expand Down
4 changes: 3 additions & 1 deletion fennel/lib/to_proto/test_to_proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,9 @@ def test_includes():
}
TestFeatureset.extractors[0]
includes_proto = to_extractor_pycode(
TestFeatureset.extractors[0], TestFeatureset, {}
TestFeatureset.extractors[0],
TestFeatureset,
{"TestFeatureset": TestFeatureset},
)
expected_extractor = rm_imports(ParseDict(f, pycode_proto.PyCode()))
includes_proto = rm_imports(includes_proto)
Expand Down
6 changes: 6 additions & 0 deletions fennel/lib/to_proto/to_proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -1196,8 +1196,14 @@ def to_extractor_pycode(
gen_code = "\n" + dedent(dep.generated_code) + "\n" + gen_code
dependencies.append(dep)

datasets_added = set()
# Extractor code construction
for dataset in extractor.get_dataset_dependencies():
datasets_added.add(dataset)
for dataset in fs_obj_map[extractor.featureset].get_dataset_dependencies():
datasets_added.add(dataset)

for dataset in datasets_added:
gen_code += get_dataset_core_code(dataset)

input_fs_added = set()
Expand Down
4 changes: 2 additions & 2 deletions fennel/test_lib/integration_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
import pyarrow as pa
from fennel_client_lib import RustClient # type: ignore
from fennel_dataset import lookup # type: ignore
except ImportError as e:
print(f"exception during import {e}")
except ImportError:
pass

from fennel._vendor.requests import Response # type: ignore

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "fennel-ai"
version = "0.18.12"
version = "0.18.13"
description = "The modern realtime feature engineering platform"
authors = ["Fennel AI <[email protected]>"]
packages = [{ include = "fennel" }]
Expand Down

0 comments on commit 7b5061e

Please sign in to comment.