Skip to content

Commit

Permalink
errors: Make it easier to spot the issue in an error (#303)
Browse files Browse the repository at this point in the history
  • Loading branch information
aditya-nambiar authored Nov 16, 2023
1 parent 100eeab commit e4ed49e
Show file tree
Hide file tree
Showing 10 changed files with 39 additions and 31 deletions.
6 changes: 3 additions & 3 deletions fennel/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,12 +492,12 @@ def extract_historical_features(
):
raise Exception(
f"Input dataframe does not contain all the required features. "
f"Required features: {input_feature_names}. "
f"Input dataframe columns: {input_dataframe.columns}"
f"Required features: `{input_feature_names}`. "
f"Input dataframe columns: `{input_dataframe.columns}`"
)
if timestamp_column not in input_dataframe.columns:
raise Exception(
f"Timestamp column {timestamp_column} not found in input dataframe."
f"Timestamp column `{timestamp_column}` not found in input dataframe."
)
# Convert timestamp column to string to make it JSON serializable
input_dataframe[timestamp_column] = input_dataframe[
Expand Down
2 changes: 1 addition & 1 deletion fennel/client_tests/test_complex_autogen_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def test_complex_auto_gen_extractors(client):
)
assert (
str(e.value)
== "Dataset NumCompletedTripsDataset not found in sync call"
== "Dataset `NumCompletedTripsDataset` not found in sync call"
)

resp = client.sync(
Expand Down
4 changes: 2 additions & 2 deletions fennel/client_tests/test_invalid.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def test_invalid_sync(self, client):
)
else:
assert (
str(e.value) == "Dataset MemberActivityDatasetCopy "
str(e.value) == "Dataset `MemberActivityDatasetCopy` "
"not found in sync call"
)

Expand Down Expand Up @@ -263,7 +263,7 @@ def test_no_access(self, client):
else:
assert (
str(e.value)
== """Dataset `MemberActivityDataset` is an input to the pipelines: `['copy']` but is not synced. Please add it to the sync call."""
== """Dataset `MemberActivityDataset` is an input to the pipelines: `['MemberActivityDatasetCopy.copy']` but is not synced. Please add it to the sync call."""
)

@mock
Expand Down
26 changes: 16 additions & 10 deletions fennel/datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1595,7 +1595,9 @@ def get_type(self, field) -> Type:
elif field == self.timestamp:
return datetime.datetime
else:
raise Exception(f"field {field} not found in schema of {self.name}")
raise Exception(
f"field `{field}` not found in schema of `{self.name}`"
)

def rename_column(self, old_name: str, new_name: str):
if old_name in self.keys:
Expand All @@ -1606,7 +1608,7 @@ def rename_column(self, old_name: str, new_name: str):
self.timestamp = new_name
else:
raise Exception(
f"field {old_name} not found in schema of {self.name}"
f"field `{old_name}` not found in schema of `{self.name}`"
)

def get_optional_cols(self) -> List[str]:
Expand All @@ -1621,23 +1623,25 @@ def drop_null_column(self, name: str):
self.values[name] = fennel_get_optional_inner(self.values[name])
elif name == self.timestamp:
raise Exception(
f"cannot drop_null on timestamp field {name} of {self.name}"
f"cannot drop_null on timestamp field `{name}` of `{self.name}`"
)
else:
raise Exception(f"field {name} not found in schema of {self.name}")
raise Exception(
f"field `{name}` not found in schema of `{self.name}`"
)

def append_value_column(self, name: str, type_: Type):
if name in self.keys:
raise Exception(
f"field {name} already exists in schema of {self.name}"
f"field `{name}` already exists in schema of `{self.name}`"
)
elif name in self.values:
raise Exception(
f"field {name} already exists in schema of {self.name}"
f"field `{name}` already exists in schema of `{self.name}`"
)
elif name == self.timestamp:
raise Exception(
f"cannot append timestamp field {name} to {self.name}"
f"cannot append timestamp field `{name}` to `{self.name}`"
)
else:
self.values[name] = type_
Expand All @@ -1649,10 +1653,12 @@ def drop_column(self, name: str):
self.values.pop(name)
elif name == self.timestamp:
raise Exception(
f"cannot drop timestamp field {name} from {self.name}"
f"cannot drop timestamp field `{name}` from `{self.name}`"
)
else:
raise Exception(f"field {name} not found in schema of {self.name}")
raise Exception(
f"field `{name}` not found in schema of `{self.name}`"
)

def update_column(self, name: str, type: Type):
if name in self.keys:
Expand All @@ -1661,7 +1667,7 @@ def update_column(self, name: str, type: Type):
self.values[name] = type
elif name == self.timestamp:
raise Exception(
f"cannot assign timestamp field {name} from {self.name}"
f"cannot assign timestamp field `{name}` from `{self.name}`"
)
else:
self.values[name] = type # Add to values
Expand Down
2 changes: 1 addition & 1 deletion fennel/datasets/test_invalid_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,7 @@ def create_pipeline(cls, a: Dataset):
client.sync(datasets=[ABCDataset])
assert (
str(e.value)
== "Dataset `XYZ` is an input to the pipelines: `['create_pipeline']` but is not synced. Please add it to the sync call."
== "Dataset `XYZ` is an input to the pipelines: `['ABCDataset.create_pipeline']` but is not synced. Please add it to the sync call."
)


Expand Down
2 changes: 1 addition & 1 deletion fennel/datasets/test_schema_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@ def pipeline_first(cls, hits: Dataset):

assert (
str(e.value)
== """field actor not found in schema of '[Dataset:SingleHits]'"""
== """field `actor` not found in schema of `'[Dataset:SingleHits]'`"""
)


Expand Down
4 changes: 2 additions & 2 deletions fennel/featuresets/featureset.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,14 +381,14 @@ def extract(
ds = field.dataset
else:
raise ValueError(
f"Dataset {field.dataset_name} not found for field {field}"
f"Dataset `{field.dataset_name}` not found for field `{field}`"
)

for k in ds.dsschema().keys: # type: ignore
feature = provider.feature(k)
if not feature:
raise ValueError(
f"Dataset key {k} not found in provider {provider._name} for extractor {name}"
f"Dataset key `{k}` not found in provider `{provider._name}` for extractor `{name}`"
)
provider_features.append(feature)

Expand Down
2 changes: 1 addition & 1 deletion fennel/featuresets/test_invalid_derived_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,5 +163,5 @@ class UserInfo8:

assert (
str(e.value)
== "Dataset key user_id not found in provider UserRequest for extractor _fennel_lookup_age"
== "Dataset key `user_id` not found in provider `UserRequest` for extractor `_fennel_lookup_age`"
)
20 changes: 11 additions & 9 deletions fennel/test_lib/mock_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def dataset_lookup_impl(
) -> Tuple[pd.DataFrame, pd.Series]:
if cls_name not in datasets:
raise ValueError(
f"Dataset {cls_name} not found, please ensure it is synced."
f"Dataset `{cls_name}` not found, please ensure it is synced."
)
if allowed_datasets is not None and cls_name not in allowed_datasets:
raise ValueError(
Expand Down Expand Up @@ -271,7 +271,7 @@ def __init__(self):

def get_dataset_df(self, dataset_name: str) -> pd.DataFrame:
if dataset_name not in self.dataset_info:
raise ValueError(f"Dataset {dataset_name} not found")
raise ValueError(f"Dataset `{dataset_name}` not found")

# If we haven't seen any values for this dataset, return an empty df with the right schema.
if (
Expand Down Expand Up @@ -390,7 +390,7 @@ def sync(
for pipeline in selected_pipelines:
for input in pipeline.inputs:
input_datasets_for_pipelines[input._name].append(
pipeline.name
f"{pipeline._dataset_name}.{pipeline.name}"
)
self.listeners[input._name].append(pipeline)

Expand Down Expand Up @@ -422,7 +422,7 @@ def sync(
for dataset in datasets:
if dataset not in self.dataset_requests:
raise ValueError(
f"Dataset {dataset} not found in sync call"
f"Dataset `{dataset}` not found in sync call"
)
self.extractors.extend(
[
Expand Down Expand Up @@ -608,18 +608,20 @@ def _process_data_connector(self, dataset: Dataset, tier):

def _internal_log(self, dataset_name: str, df: pd.DataFrame):
if df.shape[0] == 0:
print(f"Skipping log of empty dataframe for webhook {dataset_name}")
print(
f"Skipping log of empty dataframe for webhook `{dataset_name}`"
)
return FakeResponse(200, "OK")

if dataset_name not in self.dataset_requests:
return FakeResponse(404, f"Dataset {dataset_name} not found")
return FakeResponse(404, f"Dataset `{dataset_name}` not found")

dataset_req = self.dataset_requests[dataset_name]
timestamp_field = self.dataset_info[dataset_name].timestamp_field
if timestamp_field not in df.columns:
return FakeResponse(
400,
f"Timestamp field {timestamp_field} not found in dataframe "
f"Timestamp field `{timestamp_field}` not found in dataframe "
f"while logging to dataset `{dataset_name}`",
)
for col in df.columns:
Expand Down Expand Up @@ -741,7 +743,7 @@ def _run_extractors(
for feature_str in extractor.output_features:
feature_str = f"{extractor.featureset}.{feature_str}"
if feature_str not in feature_schema:
raise ValueError(f"Feature {feature_str} not found")
raise ValueError(f"Feature `{feature_str}` not found")
dtype = feature_schema[feature_str]
fields.append(Field(name=feature_str, dtype=dtype))
dsschema = DSSchema(
Expand Down Expand Up @@ -1072,7 +1074,7 @@ def cast_df_to_schema(df: pd.DataFrame, dsschema: DSSchema) -> pd.DataFrame:
for f in fields:
if f.name not in df.columns:
raise ValueError(
f"Field {f.name} not found in dataframe while logging to dataset"
f"Field `{f.name}` not found in dataframe while logging to dataset"
)
try:
series = cast_col_to_dtype(df[f.name], f.dtype)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "fennel-ai"
version = "0.18.18"
version = "0.18.19"
description = "The modern realtime feature engineering platform"
authors = ["Fennel AI <[email protected]>"]
packages = [{ include = "fennel" }]
Expand Down

0 comments on commit e4ed49e

Please sign in to comment.