Skip to content

Commit

Permalink
meta: Allow owner to be specified at the file level itself
Browse files Browse the repository at this point in the history
  • Loading branch information
aditya-nambiar committed Nov 12, 2023
1 parent 10e5ea9 commit 819d288
Show file tree
Hide file tree
Showing 11 changed files with 46 additions and 40 deletions.
3 changes: 3 additions & 0 deletions fennel/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## [0.18.15] - 2023-11-12
- Allow owner to be specified at the file level itself.

## [0.18.14] - 2023-11-11
- Use pd types rather than python types

Expand Down
14 changes: 1 addition & 13 deletions fennel/client_tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

webhook = sources.Webhook(name="fennel_webhook")

__owner__ = "[email protected]"

################################################################################
# Datasets
Expand All @@ -54,7 +55,6 @@ class NotionDocs:
creation_timestamp: datetime


@meta(owner="[email protected]")
@source(s3.bucket("engagement", prefix="coda"))
@dataset
class CodaDocs:
Expand All @@ -65,7 +65,6 @@ class CodaDocs:
creation_timestamp: datetime


@meta(owner="[email protected]")
@source(s3.bucket("engagement", prefix="google"))
@dataset
class GoogleDocs:
Expand All @@ -76,7 +75,6 @@ class GoogleDocs:
creation_timestamp: datetime


@meta(owner="[email protected]")
@dataset
class Document:
doc_id: int = field(key=True).meta(owner="[email protected]") # type: ignore
Expand Down Expand Up @@ -157,7 +155,6 @@ def get_content_features(df: pd.DataFrame) -> pd.DataFrame:
]


@meta(owner="[email protected]")
@dataset
class DocumentContentDataset:
doc_id: int = field(key=True)
Expand Down Expand Up @@ -185,7 +182,6 @@ def content_features(cls, ds: Dataset):
)


@meta(owner="[email protected]")
@dataset
class TopWordsCount:
word: str = field(key=True)
Expand All @@ -211,7 +207,6 @@ def top_words_count(cls, ds: Dataset):
) # type: ignore


@meta(owner="[email protected]")
@source(biq_query.table("user_activity", cursor="timestamp"), every="1h")
@dataset
class UserActivity:
Expand All @@ -222,7 +217,6 @@ class UserActivity:
timestamp: datetime


@meta(owner="[email protected]")
@dataset
class UserEngagementDataset:
user_id: int = field(key=True)
Expand Down Expand Up @@ -272,7 +266,6 @@ def create_short_click(df: pd.DataFrame) -> pd.DataFrame:
)


@meta(owner="[email protected]")
@dataset
class DocumentEngagementDataset:
doc_id: int = field(key=True)
Expand Down Expand Up @@ -314,14 +307,12 @@ def doc_engagement_pipeline(cls, ds: Dataset):
################################################################################


@meta(owner="[email protected]")
@featureset
class Query:
doc_id: int = feature(id=1)
user_id: int = feature(id=2)


@meta(owner="[email protected]")
@featureset
class UserBehaviorFeatures:
user_id: int = feature(id=1)
Expand All @@ -340,7 +331,6 @@ def get_user_features(cls, ts: pd.Series, user_id: pd.Series):
return df


@meta(owner="[email protected]")
@featureset
class DocumentFeatures:
doc_id: int = feature(id=1)
Expand All @@ -360,7 +350,6 @@ def get_doc_features(cls, ts: pd.Series, doc_id: pd.Series):
return df


@meta(owner="[email protected]")
@featureset
class DocumentContentFeatures:
doc_id: int = feature(id=1)
Expand All @@ -380,7 +369,6 @@ def get_features(cls, ts: pd.Series, doc_id: pd.Series):
return df


@meta(owner="[email protected]")
@featureset
class TopWordsFeatures:
word: str = feature(id=1)
Expand Down
10 changes: 10 additions & 0 deletions fennel/datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

import fennel.sources as sources
from fennel.lib.aggregate import AggregateType

from fennel.lib.aggregate.aggregate import (
Average,
Count,
Expand All @@ -47,6 +48,7 @@
from fennel.lib.includes import TierSelector
from fennel.lib.metadata import (
meta,
OWNER,
get_meta_attr,
set_meta_attr,
)
Expand Down Expand Up @@ -909,13 +911,19 @@ def _create_dataset(
if struct_code:
setattr(dataset_cls, FENNEL_STRUCT_SRC_CODE, struct_code)

cls_module = inspect.getmodule(dataset_cls)
owner = None
if cls_module is not None and hasattr(cls_module, OWNER):
owner = getattr(cls_module, OWNER)

return Dataset(
dataset_cls,
fields,
history=duration_to_timedelta(history),
lookup_fn=_create_lookup_function(
dataset_cls.__name__, key_fields, struct_types # type: ignore
),
owner=owner,
)

def wrap(c: Type[T]) -> Dataset:
Expand Down Expand Up @@ -1148,6 +1156,7 @@ def __init__(
fields: List[Field],
history: datetime.timedelta,
lookup_fn: Optional[Callable] = None,
owner: Optional[str] = None,
):
super().__init__()
self._name = cls.__name__ # type: ignore
Expand All @@ -1168,6 +1177,7 @@ def __init__(
self.lookup = lookup_fn # type: ignore
self._add_fields_as_attributes()
self.expectations = self._get_expectations()
setattr(self, OWNER, owner)

def __class_getitem__(cls, item):
return item
Expand Down
11 changes: 5 additions & 6 deletions fennel/datasets/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
from fennel.test_lib import *

webhook = Webhook(name="fennel_webhook")
__owner__ = "[email protected]"


@meta(owner="[email protected]")
@source(webhook.endpoint("UserInfoDataset"))
@dataset
class UserInfoDataset:
user_id: int = field(key=True)
user_id: int = field(key=True).meta(owner="[email protected]") # type: ignore
name: str
gender: str
# Users date of birth
Expand All @@ -45,7 +45,7 @@ def test_simple_dataset():
"datasets": [
{
"name": "UserInfoDataset",
"metadata": {"owner": "[email protected]"},
"metadata": {"owner": "[email protected]"},
"dsschema": {
"keys": {
"fields": [
Expand Down Expand Up @@ -79,7 +79,7 @@ def test_simple_dataset():
"name": {},
"account_creation_date": {},
"country": {},
"user_id": {},
"user_id": {"owner": "[email protected]"},
"gender": {},
"timestamp": {},
"dob": {"description": "Users date of birth"},
Expand Down Expand Up @@ -188,7 +188,6 @@ def create_aggregated_dataset(cls, user_info: Dataset):


@source(webhook.endpoint("Activity"))
@meta(owner="[email protected]")
@dataset(history="120d")
class Activity:
user_id: int
Expand All @@ -207,7 +206,7 @@ def test_dataset_with_retention():
"datasets": [
{
"name": "Activity",
"metadata": {"owner": "[email protected]"},
"metadata": {"owner": "[email protected]"},
"dsschema": {
"keys": {},
"values": {
Expand Down
4 changes: 2 additions & 2 deletions fennel/datasets/test_dataset_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@

webhook = Webhook(name="fennel_webhook")

__owner__ = "[email protected]"


@meta(owner="[email protected]")
@source(webhook.endpoint("UserInfoDataset"))
@dataset
class UserInfoDataset:
Expand Down Expand Up @@ -52,7 +53,6 @@ def fake_func(
def test_dataset_lookup():
fennel.datasets.datasets.dataset_lookup = fake_func

@meta(owner="[email protected]")
@featureset
class UserAgeFeatures:
userid: int = feature(id=1)
Expand Down
14 changes: 9 additions & 5 deletions fennel/featuresets/featureset.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from fennel.lib.includes import FENNEL_INCLUDED_MOD
from fennel.lib.metadata import (
meta,
OWNER,
get_meta_attr,
set_meta_attr,
)
Expand Down Expand Up @@ -120,11 +121,11 @@ def featureset(featureset_cls: Type[T]):
setattr(featureset_cls, FENNEL_VIRTUAL_FILE, file_name)
except Exception:
pass

return Featureset(
featureset_cls,
features,
)
cls_module = inspect.getmodule(featureset_cls)
owner = None
if cls_module is not None and hasattr(cls_module, OWNER):
owner = getattr(cls_module, OWNER)
return Featureset(featureset_cls, features, owner)


def extractor(
Expand Down Expand Up @@ -462,11 +463,13 @@ class Featureset:
_extractors: List[Extractor]
_id_to_feature: Dict[int, Feature] = {}
_expectation: Expectations
owner: Optional[str] = None

def __init__(
self,
featureset_cls: Type[T],
features: List[Feature],
owner: Optional[str] = None,
):
self.__fennel_original_cls__ = featureset_cls
self._name = featureset_cls.__name__
Expand All @@ -478,6 +481,7 @@ def __init__(
self._add_feature_names_as_attributes()
self._set_extractors_as_attributes()
self._expectation = self._get_expectations()
setattr(self, OWNER, owner)
propogate_fennel_attributes(featureset_cls, self)

def get_dataset_dependencies(self):
Expand Down
1 change: 1 addition & 0 deletions fennel/lib/metadata/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from fennel.lib.metadata.metadata import (
meta,
Metadata,
OWNER,
get_meta_attr,
set_meta_attr,
get_metadata_proto,
Expand Down
12 changes: 9 additions & 3 deletions fennel/lib/metadata/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

EMAIL_REGEX = r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$"
META_FIELD = "__fennel_metadata__"
OWNER = "__owner__"


class Metadata(BaseModel):
Expand Down Expand Up @@ -102,12 +103,17 @@ def set_meta_attr(obj: Any, attr: str, value: Any):
setattr(obj, META_FIELD, meta)


def get_metadata_proto(obj: Any) -> proto.Metadata:
def get_metadata_proto(
obj: Any,
) -> proto.Metadata:
owner = getattr(obj, OWNER, "")
meta = get_meta(obj)
if meta is None:
return proto.Metadata()
return proto.Metadata(owner=owner)
if meta.owner != "":
owner = meta.owner
return proto.Metadata(
owner=meta.owner,
owner=owner,
description=meta.description,
tags=meta.tags,
deprecated=meta.deprecated,
Expand Down
9 changes: 5 additions & 4 deletions fennel/lib/to_proto/to_proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
duration_to_timedelta,
)
from fennel.lib.includes import FENNEL_INCLUDED_MOD
from fennel.lib.metadata import get_metadata_proto, get_meta_attr
from fennel.lib.metadata import get_metadata_proto, get_meta_attr, OWNER
from fennel.lib.schema import get_datatype, FENNEL_STRUCT
from fennel.lib.to_proto import Serializer
from fennel.lib.to_proto.source_code import (
Expand Down Expand Up @@ -172,7 +172,6 @@ def dataset_to_proto(ds: Dataset) -> ds_proto.CoreDataset:
from fennel.datasets.datasets import dataset_lookup
"""
)

return ds_proto.CoreDataset(
name=ds.__name__,
metadata=get_metadata_proto(ds),
Expand Down Expand Up @@ -468,9 +467,11 @@ def _check_owner_exists(obj):
owner = get_meta_attr(obj, "owner")
if owner is None or owner == "":
if isinstance(obj, Featureset):
raise Exception(f"Featureset {obj._name} must have an owner.")
if not hasattr(obj, OWNER) or getattr(obj, OWNER) is None:
raise Exception(f"Featureset {obj._name} must have an owner.")
elif isinstance(obj, Dataset):
raise Exception(f"Dataset {obj._name} must have an owner.")
if not hasattr(obj, OWNER) or getattr(obj, OWNER) is None:
raise Exception(f"Dataset {obj._name} must have an owner.")
else:
raise Exception(f"Object {obj.__name__} must have an owner.")

Expand Down
6 changes: 0 additions & 6 deletions fennel/test_lib/integration_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,6 @@

try:
import pyarrow as pa
import sys

sys.path.insert(
0,
"/nix/store/wrkjic4qykdb8gkg271b388cdqhzxf7d-python3-3.11.5-env/lib/python3.11/site-packages",
)
from fennel_client_lib import RustClient # type: ignore
from fennel_dataset import lookup # type: ignore
except ImportError:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "fennel-ai"
version = "0.18.14"
version = "0.18.15"
description = "The modern realtime feature engineering platform"
authors = ["Fennel AI <[email protected]>"]
packages = [{ include = "fennel" }]
Expand Down

0 comments on commit 819d288

Please sign in to comment.