Skip to content

Commit

Permalink
Better error messages (#304)
Browse files Browse the repository at this point in the history
errors: Better errors
  • Loading branch information
aditya-nambiar authored Nov 18, 2023
1 parent e4ed49e commit a774fd6
Show file tree
Hide file tree
Showing 10 changed files with 40 additions and 33 deletions.
2 changes: 1 addition & 1 deletion docs/examples/featuresets/overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def e2(cls, ts: pd.Series, durations: pd.Series) -> pd.Series:
view._get_sync_request_proto()
assert (
str(e.value)
== "Feature `over_3hrs` is extracted by multiple extractors including `e2`."
== "Feature `over_3hrs` is extracted by multiple extractors including `e2` in featureset `Movies`."
)


Expand Down
23 changes: 10 additions & 13 deletions fennel/datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,10 @@ def join(
within: Tuple[Duration, Duration] = ("forever", "0s"),
) -> Join:
if not isinstance(other, Dataset) and isinstance(other, _Node):
raise ValueError("Cannot join with an intermediate dataset")
raise ValueError(
"Cannot join with an intermediate dataset, i.e something defined inside a pipeline."
" Only joining against keyed datasets is permitted."
)
if not isinstance(other, _Node):
raise TypeError("Cannot join with a non-dataset object")
return Join(self, other, within, how, on, left_on, right_on)
Expand Down Expand Up @@ -622,18 +625,14 @@ def make_types_optional(types: Dict[str, Type]) -> Dict[str, Type]:
for col in common_cols:
if self.lsuffix != "" and (col + self.lsuffix) in left_schema:
raise ValueError(
"Column name collision. `{}` already exists in schema of left input {}".format(
col + self.lsuffix, left_dsschema.name
)
f"Column name collision. `{col + self.lsuffix}` already exists in schema of left input {left_dsschema.name}, while joining with {self.dataset.dsschema().name}"
)
if (
self.rsuffix != ""
and (col + self.rsuffix) in right_value_schema
):
raise ValueError(
"Column name collision. `{}` already exists in schema of right input {}".format(
col + self.rsuffix, self.dataset.dsschema().name
)
f"Column name collision. `{col + self.rsuffix}` already exists in schema of right input {self.dataset.dsschema().name}, while joining with {self.dataset.dsschema().name}"
)
left_dsschema.rename_column(col, col + self.lsuffix)
left_schema[col + self.lsuffix] = left_schema.pop(col)
Expand All @@ -648,9 +647,7 @@ def make_types_optional(types: Dict[str, Type]) -> Dict[str, Type]:
for col, dtype in right_value_schema.items():
if col in left_schema:
raise ValueError(
"Column name collision. `{}` already exists in schema of left input {}".format(
col, left_dsschema.name
)
f"Column name collision. `{col}` already exists in schema of left input {left_dsschema.name}, while joining with {self.dataset.dsschema().name}"
)
joined_dsschema.append_value_column(col, dtype)

Expand Down Expand Up @@ -787,7 +784,7 @@ def lookup(
) -> Tuple[pd.DataFrame, pd.Series]:
if len(key_fields) == 0:
raise Exception(
f"Trying to lookup dataset `{cls_name} with no keys defined.\n"
f"Trying to lookup dataset `{cls_name}` with no keys defined.\n"
f"Please define one or more keys using field(key=True) to perform a lookup."
)
if len(args) > 0:
Expand Down Expand Up @@ -1273,7 +1270,7 @@ def _set_timestamp_field(self):
if timestamp_field_set:
raise ValueError(
f"Multiple timestamp fields are not supported in "
f"dataset `{self._name}`."
f"dataset `{self._name}`. Please set one of the datetime fields to be the timestamp field."
)
timestamp_field_set = True

Expand All @@ -1292,7 +1289,7 @@ def _set_timestamp_field(self):
else:
raise ValueError(
f"Multiple timestamp fields are not "
f"supported in dataset `{self._name}`."
f"supported in dataset `{self._name}`. Please set one of the datetime fields to be the timestamp field."
)
if not timestamp_field_set:
raise ValueError(
Expand Down
7 changes: 5 additions & 2 deletions fennel/datasets/test_invalid_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class UserInfoDataset:
_ = InternalTestClient()
assert (
str(e.value) == "Multiple timestamp fields are not supported in "
"dataset `UserInfoDataset`."
"dataset `UserInfoDataset`. Please set one of the datetime fields to be the timestamp field."
)


Expand Down Expand Up @@ -647,7 +647,10 @@ def create_pipeline(cls, a: Dataset):
b = a.transform(lambda x: x)
return a.join(b, how="left", on=["user_id"]) # type: ignore

assert str(e.value) == "Cannot join with an intermediate dataset"
assert (
str(e.value)
== "Cannot join with an intermediate dataset, i.e something defined inside a pipeline. Only joining against keyed datasets is permitted."
)


def test_dataset_incorrect_join_bounds():
Expand Down
13 changes: 8 additions & 5 deletions fennel/featuresets/featureset.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,14 @@ def _create_extractor(extractor_func: Callable, version: int):
elif hasattr(inp, "__name__"):
name = inp.__name__
else:
name = str(inp)
if hasattr(inp, "fqn"):
name = inp.fqn()
else:
name = str(inp)
raise TypeError(
f"Parameter `{name}` is not a feature of but a "
f"`{type(inp)}`. Please note "
f"that Featuresets are mutable and hence not supported."
f"Parameter `{name}` is not a feature, but a "
f"`{type(inp)}`, and hence not supported as an input for the extractor "
f"`{extractor_name}`"
)
params.append(inp)
if hasattr(extractor_func, FENNEL_OUTPUTS):
Expand Down Expand Up @@ -748,6 +751,6 @@ def sync_validation_for_extractors(extractors: List[Extractor]):
if feature in extracted_features:
raise TypeError(
f"Feature `{feature}` is "
f"extracted by multiple extractors including `{extractor.name}`."
f"extracted by multiple extractors including `{extractor.name}` in featureset `{extractor.featureset}`."
)
extracted_features.add(feature)
2 changes: 1 addition & 1 deletion fennel/featuresets/test_featureset.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ def get_user_income(cls, ts: pd.Series, user_id: pd.Series):
view._get_sync_request_proto()
assert (
str(e.value)
== "Feature `income` is extracted by multiple extractors including `get_user_income`."
== "Feature `income` is extracted by multiple extractors including `get_user_income` in featureset `UserInfo`."
)

sync_request = view._get_sync_request_proto("prod")
Expand Down
2 changes: 1 addition & 1 deletion fennel/featuresets/test_invalid_derived_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def get_age(cls, ts: pd.Series, user_id: pd.Series):
view._get_sync_request_proto()
assert (
str(e.value)
== "Feature `age` is extracted by multiple extractors including `get_age`."
== "Feature `age` is extracted by multiple extractors including `get_age` in featureset `UserInfo3`."
)


Expand Down
4 changes: 2 additions & 2 deletions fennel/featuresets/test_invalid_featureset.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def get_user_info1(cls, ts: pd.Series, user: pd.Series):

assert (
str(e.value)
== "Parameter `User` is not a feature of but a `<class 'fennel.featuresets.featureset.Featureset'>`. Please note that Featuresets are mutable and hence not supported."
== "Parameter `User` is not a feature, but a `<class 'fennel.featuresets.featureset.Featureset'>`, and hence not supported as an input for the extractor `get_user_info1`"
)


Expand Down Expand Up @@ -90,7 +90,7 @@ def get_user_info3(cls, ts: pd.Series, user_id: pd.Series):
view._get_sync_request_proto()
assert (
str(e.value)
== "Feature `gender` is extracted by multiple extractors including `get_user_info3`."
== "Feature `gender` is extracted by multiple extractors including `get_user_info3` in featureset `UserInfo`."
)


Expand Down
16 changes: 10 additions & 6 deletions fennel/lib/to_proto/to_proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,9 @@ def fields_to_dsschema(fields: List[Field]) -> schema_proto.DSSchema:
keys.append(_field_to_proto(field))
elif field.timestamp:
if ts is not None:
raise ValueError("Multiple timestamp fields are not supported")
raise ValueError(
"Multiple timestamp fields are not supported. Please set one of the datetime fields to be the timestamp field."
)
ts = field.name
else:
values.append(_field_to_proto(field))
Expand Down Expand Up @@ -1199,6 +1201,7 @@ def to_extractor_pycode(
f"extractor {extractor.name} has type PyFunc but no function defined"
)
dependencies = []
extractor_fqn = f"{featureset._name}.{extractor.name}"
gen_code = ""
if hasattr(extractor.func, FENNEL_INCLUDED_MOD):
for f in getattr(extractor.func, FENNEL_INCLUDED_MOD):
Expand All @@ -1214,8 +1217,8 @@ def to_extractor_pycode(
for input in extractor.inputs:
if not isinstance(input, Feature):
raise ValueError(
f"Extractor {extractor.name} must have inputs "
f"of type Feature, but got {type(input)}"
f"Extractor `{extractor_fqn}` must have inputs "
f"of type Feature, but got `{type(input)}`"
)
# Dont add the featureset of the extractor itself
if input.featureset_name == featureset._name:
Expand All @@ -1224,9 +1227,10 @@ def to_extractor_pycode(
input_fs_added.add(input.featureset_name)
if input.featureset_name not in fs_obj_map:
raise ValueError(
f"Extractor {extractor.name} has an input "
f"feature {input.name} from featureset "
f"{input.featureset_name} which is not synced"
f"Extractor `{extractor_fqn}` has an input "
f"feature `{input.name}` from featureset "
f"`{input.featureset_name}` which is not synced."
f"Please add the featureset to the sync call."
)
gen_code = (
gen_code
Expand Down
2 changes: 1 addition & 1 deletion fennel/test_lib/mock_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,7 @@ def _check_schema_exceptions(
if len(exceptions) > 0:
raise Exception(
f"Extractor `{extractor_name}` returned "
f"invalid schema: {exceptions}"
f"invalid schema for data: {exceptions}"
)

def _compute_lookup_extractor(
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "fennel-ai"
version = "0.18.19"
version = "0.18.20"
description = "The modern realtime feature engineering platform"
authors = ["Fennel AI <[email protected]>"]
packages = [{ include = "fennel" }]
Expand Down

0 comments on commit a774fd6

Please sign in to comment.