diff --git a/docs/examples/api-reference/operators_ref.py b/docs/examples/api-reference/operators_ref.py index 40ae7af9e..d5e2b4ab3 100644 --- a/docs/examples/api-reference/operators_ref.py +++ b/docs/examples/api-reference/operators_ref.py @@ -107,7 +107,7 @@ class UserFirstAction: def create_user_first_action_category(cls, txns: UserTransactions): # docsnip first first_txns = txns.groupby("user_id").first() - return first_txns.drop(["merchant_id"]) + return first_txns.drop("merchant_id") # /docsnip diff --git a/fennel/datasets/datasets.py b/fennel/datasets/datasets.py index 37d355182..bfd4e668b 100644 --- a/fennel/datasets/datasets.py +++ b/fennel/datasets/datasets.py @@ -767,30 +767,33 @@ def _create_lookup_function( :param struct: Map from column names to Struct Classes. We use this to convert any dictionaries back to structs post lookup. """ - if len(key_fields) == 0: - return None def lookup( ts: pd.Series, *args, **kwargs ) -> Tuple[pd.DataFrame, pd.Series]: + if len(key_fields) == 0: + raise Exception( + f"Trying to lookup dataset `{cls_name} with no keys defined.\n" + f"Please define one or more keys using field(key=True) to perform a lookup." + ) if len(args) > 0: raise ValueError( - f"lookup expects key value arguments and can " + f"Lookup for dataset `{cls_name}` expects key value arguments and can " f"optionally include fields, found {args}" ) if len(kwargs) < len(key_fields): raise ValueError( - f"lookup expects keys of the table being looked up and can " + f"Lookup for dataset `{cls_name}` expects keys of the table being looked up and can " f"optionally include fields, found {kwargs}" ) # Check that ts is a series of datetime64[ns] if not isinstance(ts, pd.Series): raise ValueError( - f"lookup expects a series of timestamps, found {type(ts)}" + f"Lookup for dataset `{cls_name}` expects a series of timestamps, found {type(ts)}" ) if not np.issubdtype(ts.dtype, np.datetime64): raise ValueError( - f"lookup expects a series of timestamps, found {ts.dtype}" + f"Lookup for dataset `{cls_name}` expects a series of timestamps, found {ts.dtype}" ) # extract keys and fields from kwargs arr = [] diff --git a/fennel/featuresets/featureset.py b/fennel/featuresets/featureset.py index c3fb4dddd..d551b77b5 100644 --- a/fennel/featuresets/featureset.py +++ b/fennel/featuresets/featureset.py @@ -480,6 +480,31 @@ def __init__( self._expectation = self._get_expectations() propogate_fennel_attributes(featureset_cls, self) + def get_dataset_dependencies(self): + """ + This function gets the list of datasets the Featureset depends upon. + This dependency is introduced by features that directly lookup a dataset + via the DS-FS route. + + The motivation for this function is to help generated the required code, even + if an extractor does not depend on a dataset, but is part of a featureset which + has these kinds of dependencies. + """ + depended_datasets = [] + for f in self._features: + if ( + f.extractor is not None + and f.extractor.derived_extractor_info is not None + ): + assert ( + f.extractor.derived_extractor_info.field.dataset is not None + ) + depended_datasets.append( + f.extractor.derived_extractor_info.field.dataset + ) + + return depended_datasets + # ------------------- Private Methods ---------------------------------- def _add_feature_names_as_attributes(self): diff --git a/fennel/lib/to_proto/test_to_proto.py b/fennel/lib/to_proto/test_to_proto.py index f5bfc9df7..c30acffdc 100644 --- a/fennel/lib/to_proto/test_to_proto.py +++ b/fennel/lib/to_proto/test_to_proto.py @@ -118,7 +118,9 @@ def test_includes(): } TestFeatureset.extractors[0] includes_proto = to_extractor_pycode( - TestFeatureset.extractors[0], TestFeatureset, {} + TestFeatureset.extractors[0], + TestFeatureset, + {"TestFeatureset": TestFeatureset}, ) expected_extractor = rm_imports(ParseDict(f, pycode_proto.PyCode())) includes_proto = rm_imports(includes_proto) diff --git a/fennel/lib/to_proto/to_proto.py b/fennel/lib/to_proto/to_proto.py index b3c62b8e9..69da639f0 100644 --- a/fennel/lib/to_proto/to_proto.py +++ b/fennel/lib/to_proto/to_proto.py @@ -1196,8 +1196,14 @@ def to_extractor_pycode( gen_code = "\n" + dedent(dep.generated_code) + "\n" + gen_code dependencies.append(dep) + datasets_added = set() # Extractor code construction for dataset in extractor.get_dataset_dependencies(): + datasets_added.add(dataset) + for dataset in fs_obj_map[extractor.featureset].get_dataset_dependencies(): + datasets_added.add(dataset) + + for dataset in datasets_added: gen_code += get_dataset_core_code(dataset) input_fs_added = set() diff --git a/fennel/test_lib/integration_client.py b/fennel/test_lib/integration_client.py index 22dd2bfe6..90820d4d2 100644 --- a/fennel/test_lib/integration_client.py +++ b/fennel/test_lib/integration_client.py @@ -11,8 +11,8 @@ import pyarrow as pa from fennel_client_lib import RustClient # type: ignore from fennel_dataset import lookup # type: ignore -except ImportError as e: - print(f"exception during import {e}") +except ImportError: + pass from fennel._vendor.requests import Response # type: ignore diff --git a/pyproject.toml b/pyproject.toml index 2757bd692..8a8754ad7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "fennel-ai" -version = "0.18.12" +version = "0.18.13" description = "The modern realtime feature engineering platform" authors = ["Fennel AI "] packages = [{ include = "fennel" }]