-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
extractors: Add dtype validation for autogenerated extractors (#310)
* extractors: Add dtype validation for autogenerated extractors * Handle optional fields in ds-fs lookups
- Loading branch information
1 parent
e076671
commit 7822015
Showing
13 changed files
with
222 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,7 +57,7 @@ class UserInfo: | |
default="unspecified", | ||
) | ||
# lookup with meta | ||
age_years: float = ( | ||
age_years: int = ( | ||
feature(id=3) | ||
.extract( | ||
field=UserInfoDataset.age, | ||
|
@@ -101,7 +101,7 @@ class AgeInfo: | |
# alias a feature that has an explicit extractor | ||
age_group: AgeGroup = feature(id=1).extract(feature=UserInfo.age_group) | ||
# alias a feature that has a derived extractor | ||
age: float = feature(id=2).extract(feature=UserInfo.age_years) | ||
age: int = feature(id=2).extract(feature=UserInfo.age_years) | ||
|
||
view = InternalTestClient() | ||
view.add(UserInfoDataset) | ||
|
@@ -168,7 +168,7 @@ def test_feature(actual_feature, expected_dict): | |
{ | ||
"id": 3, | ||
"name": "age_years", | ||
"dtype": {"double_type": {}}, | ||
"dtype": {"int_type": {}}, | ||
"metadata": { | ||
"owner": "[email protected]", | ||
"description": "lookup with meta", | ||
|
@@ -211,7 +211,7 @@ def test_feature(actual_feature, expected_dict): | |
{ | ||
"id": 2, | ||
"name": "age", | ||
"dtype": {"double_type": {}}, | ||
"dtype": {"int_type": {}}, | ||
"metadata": { | ||
"description": "alias a feature that has a derived extractor" | ||
}, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
from datetime import datetime | ||
from typing import Optional, List | ||
import sys | ||
|
||
import pandas as pd | ||
import pytest | ||
|
@@ -8,11 +9,16 @@ | |
from fennel.datasets import dataset, field | ||
from fennel.featuresets import featureset, extractor, feature | ||
from fennel.lib.schema import inputs, outputs | ||
from fennel.sources import source, Webhook | ||
|
||
# noinspection PyUnresolvedReferences | ||
from fennel.test_lib import * | ||
|
||
__owner__ = "[email protected]" | ||
webhook = Webhook(name="fennel_webhook") | ||
|
||
|
||
@source(webhook.endpoint("UserInfoDataset")) | ||
@dataset | ||
class UserInfoDataset: | ||
user_id: int = field(key=True) | ||
|
@@ -21,6 +27,7 @@ class UserInfoDataset: | |
# Users date of birth | ||
dob: str | ||
age: int | ||
ids: List[int] | ||
account_creation_date: datetime | ||
country: Optional[str] | ||
timestamp: datetime = field(timestamp=True) | ||
|
@@ -238,3 +245,114 @@ class UserInfo: | |
str(e.value) | ||
== "Feature `extractors` in `UserInfo` has a reserved name `extractors`." | ||
) | ||
|
||
|
||
@featureset | ||
class UserInfo2: | ||
user_id: int = feature(id=1) | ||
home_geoid: int = feature(id=2) | ||
age: int = feature(id=3).extract(field=UserInfoDataset.age, default=0) # type: ignore | ||
credit_score: int = feature(id=4) | ||
|
||
|
||
@mock | ||
def test_tp(client): | ||
client.sync(datasets=[UserInfoDataset], featuresets=[UserInfo2]) | ||
|
||
|
||
@mock | ||
def test_invalid_autogenerated_extractors(client): | ||
with pytest.raises(TypeError) as e: | ||
|
||
@featureset | ||
class UserInfo: | ||
user_id: int = feature(id=1) | ||
home_geoid: int = feature(id=2) | ||
age: int = feature(id=3).extract(field=UserInfoDataset.age) | ||
credit_score: int = feature(id=4) | ||
|
||
# age should be optional[int] | ||
if sys.version_info < (3, 9): | ||
assert ( | ||
str(e.value) | ||
== "Feature `UserInfo.age` has type `<class 'int'>` but expectected type `typing.Union[int, NoneType]`" | ||
) | ||
else: | ||
assert ( | ||
str(e.value) | ||
== "Feature `UserInfo.age` has type `<class 'int'>` but expectected type `typing.Optional[int]`" | ||
) | ||
|
||
with pytest.raises(TypeError) as e: | ||
|
||
@featureset | ||
class UserInfo1: | ||
user_id: int = feature(id=1) | ||
home_geoid: int = feature(id=2) | ||
age: float = feature(id=3).extract( | ||
field=UserInfoDataset.age, default=0 | ||
) | ||
credit_score: int = feature(id=4) | ||
|
||
# age should be int | ||
assert ( | ||
str(e.value) | ||
== "Feature `UserInfo1.age` has type `<class 'float'>` but expected type `<class 'int'>`." | ||
) | ||
|
||
with pytest.raises(TypeError) as e: | ||
|
||
@featureset | ||
class UserInfo2: | ||
home_geoid: int = feature(id=2) | ||
age: int = feature(id=3) | ||
credit_score: int = feature(id=4) | ||
|
||
@featureset | ||
class UserInfo2: | ||
user_id: int = feature(id=1) | ||
home_geoid: float = feature(id=2).extract( | ||
feature=UserInfo2.home_geoid | ||
) | ||
age: int = feature(id=3) | ||
credit_score: int = feature(id=4) | ||
|
||
# home_geoid should be int | ||
assert ( | ||
str(e.value) | ||
== "Feature `UserInfo2.home_geoid` has type `<class 'float'>` but the extractor aliasing `UserInfo2.home_geoid` has input type `<class 'int'>`." | ||
) | ||
|
||
with pytest.raises(ValueError) as e: | ||
|
||
@featureset | ||
class UserInfo3: | ||
user_id: int = feature(id=1) | ||
home_geoid: int = feature(id=2) | ||
age: int = feature(id=4).extract( | ||
field=UserInfoDataset.age, default=0.0 | ||
) | ||
credit_score: int = feature(id=5) | ||
|
||
# default value for age should be 0 | ||
assert ( | ||
str(e.value) | ||
== "Default value `0.0` for feature `UserInfo3.age` has incorrect default value: Value `0.0` does not match type `int`" | ||
) | ||
|
||
with pytest.raises(ValueError) as e: | ||
|
||
@featureset | ||
class UserInfo4: | ||
user_id: int = feature(id=1) | ||
home_geoid: int = feature(id=2) | ||
country: str = feature(id=4).extract( | ||
field=UserInfoDataset.country, default=0.0 | ||
) | ||
credit_score: int = feature(id=5) | ||
|
||
# default value for age should be 0 | ||
assert ( | ||
str(e.value) | ||
== "Default value `0.0` for feature `UserInfo4.country` has incorrect default value: Value `0.0` does not match type `Optional[str]`" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
FENNEL_GEN_CODE_MARKER = "__fennel_gen_code__" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,4 +22,5 @@ | |
FENNEL_STRUCT, | ||
FENNEL_STRUCT_DEPENDENCIES_SRC_CODE, | ||
FENNEL_STRUCT_SRC_CODE, | ||
validate_value_matches_type, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,3 +10,4 @@ | |
to_duration_proto, | ||
to_sync_request_proto, | ||
) | ||
from fennel.lib.to_proto.source_code import FENNEL_GEN_CODE_MARKER |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.