Skip to content
This repository has been archived by the owner on Apr 15, 2022. It is now read-only.

Commit

Permalink
Dashboard update (#123)
Browse files Browse the repository at this point in the history
* tests passing, airflow managed

* better function name

* cleaner function

* update tests

* table name, not schema

* get deployments from feature or feature set

* update feature metadata

* most recent features and most used features

* comment
  • Loading branch information
Ben Epstein authored Apr 5, 2021
1 parent 80519d0 commit 00ec0bb
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 8 deletions.
46 changes: 39 additions & 7 deletions feature_store/src/rest_api/crud.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,22 @@

from sqlalchemy.orm import Session, aliased, load_only
from typing import List, Dict, Union, Optional, Any, Tuple, Set
from sqlalchemy.orm import Session, aliased
from typing import List, Dict, Union, Any, Tuple, Set
from . import schemas
from .constants import SQL, SQLALCHEMY_TYPES
from .constants import SQL
from shared.models import feature_store_models as models
from shared.services.database import SQLAlchemyClient, DatabaseFunctions
from shared.logger.logging_config import logger
from fastapi import status
import re
import json
from datetime import datetime
from sqlalchemy import update, Integer, String, func, distinct, cast, and_, Column, event, DateTime, literal_column, text
from sqlalchemy import desc, update, String, func, distinct, cast, and_, Column, literal_column, text
from .utils.utils import (__get_pk_columns, get_pk_column_str, datatype_to_sql,
sql_to_datatype, _sql_to_sqlalchemy_columns, model_to_schema_feature,
__validate_feature_data_type, __validate_primary_keys)
from mlflow.store.tracking.dbmodels.models import SqlRun, SqlTag, SqlParam
from sqlalchemy.schema import MetaData, Table, PrimaryKeyConstraint, DDL
from sqlalchemy.types import (CHAR, VARCHAR, DATE, TIME, TIMESTAMP, BLOB, CLOB, TEXT, BIGINT,
DECIMAL, FLOAT, INTEGER, NUMERIC, REAL, SMALLINT, BOOLEAN)
from sqlalchemy.types import TIMESTAMP
from shared.api.exceptions import SpliceMachineException, ExceptionCodes


Expand Down Expand Up @@ -585,6 +584,34 @@ def _get_num_created_models(db) -> int:
filter(SqlTag.key=='splice.model_name').\
count()

def get_recent_features(db: Session, n: int = 5) -> List[str]:
"""
Gets the top n most recently added features to the feature store
:param db: Session
:param n: How many features to get. Default 5
:return: List[str] Feature names
"""
res = db.query(models.Feature.name).order_by(desc(models.Feature.last_update_ts)).limit(n).all()
return [i for (i,) in res]

def get_most_used_features(db: Session, n=5) -> List[str]:
"""
Gets the top n most used features (where most used means in the most number of deployments)
:param db: Session
:param n: How many to return. Default 5
:return: List[str] Feature Names
"""
p = db.query(models.Deployment.training_set_id).subquery('p')
p1 = db.query(models.TrainingSetFeature.feature_id).filter(models.TrainingSetFeature.training_set_id.in_(p))
res = db.query(models.Feature.name, func.count().label('feat_count')).\
filter(models.Feature.feature_id.in_(p1)).\
group_by(models.Feature.name).\
subquery('feature_count')
res = db.query(res.c.name).order_by(res.c.feat_count).limit(n).all()
return [i for (i,) in res]

def get_fs_summary(db: Session) -> schemas.FeatureStoreSummary:
"""
This function returns a summary of the feature store including:
Expand Down Expand Up @@ -612,6 +639,9 @@ def get_fs_summary(db: Session) -> schemas.FeatureStoreSummary:
num_deployemnts = _get_num_deployments(db)
num_pending_feature_set_deployments = _get_num_pending_feature_sets(db)

recent_features = get_recent_features(db, 5)
most_used_features = get_most_used_features(db, 5)

return schemas.FeatureStoreSummary(
num_feature_sets=num_fsets,
num_deployed_feature_sets=num_deployed_fsets,
Expand All @@ -621,7 +651,9 @@ def get_fs_summary(db: Session) -> schemas.FeatureStoreSummary:
num_training_views=num_training_views,
num_models=num_created_models,
num_deployed_models=num_deployemnts,
num_pending_feature_set_deployments=num_pending_feature_set_deployments
num_pending_feature_set_deployments=num_pending_feature_set_deployments,
recent_features=recent_features,
most_used_features=most_used_features
)

def update_feature_metadata(db: Session, name: str, desc: str = None,
Expand Down
2 changes: 2 additions & 0 deletions feature_store/src/rest_api/routers/synchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ def get_summary(db: Session = Depends(crud.get_db)):
* Number of associated models - this is a count of the MLManager.RUNS table where the `splice.model_name` tag is set and the `splice.feature_store.training_set` parameter is set
* Number of active (deployed) models (that have used the feature store for training)
* Number of pending feature sets - this will will require a new table `featurestore.pending_feature_set_deployments` and it will be a count of that
* 5 Most newly added features
* 5 Most used features (across deployments)
"""
return crud.get_fs_summary(db)

Expand Down
3 changes: 2 additions & 1 deletion feature_store/src/rest_api/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ class Feature(FeatureBase):
class Config:
orm_mode = True


class FeatureDescription(Feature):
feature_set_name: Optional[str] = None

Expand Down Expand Up @@ -137,3 +136,5 @@ class FeatureStoreSummary(BaseModel):
num_models: int
num_deployed_models: int
num_pending_feature_set_deployments: int
recent_features: List[str]
most_used_features: List[str]

0 comments on commit 00ec0bb

Please sign in to comment.