Skip to content
This repository has been archived by the owner on Apr 15, 2022. It is now read-only.

Commit

Permalink
docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Ben-Epstein committed Jan 12, 2021
1 parent f0b54fe commit f243e0f
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 6 deletions.
11 changes: 8 additions & 3 deletions splicemachine/features/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
class FeatureStore:
def __init__(self, splice_ctx: PySpliceContext) -> None:
self.splice_ctx = splice_ctx
self.mlflow_ctx = None
self.feature_sets = [] # Cache of newly created feature sets

def register_splice_context(self, splice_ctx: PySpliceContext) -> None:
Expand Down Expand Up @@ -166,7 +167,7 @@ def get_feature_vector(self, features: List[Union[str, Feature]],
Gets a feature vector given a list of Features and primary key values for their corresponding Feature Sets
:param features: List of str Feature names or Features
:param join_key_values: (dict) join key vals to get the proper Feature values formatted as {join_key_column_name: join_key_value}
:param join_key_values: (dict) join key values to get the proper Feature values formatted as {join_key_column_name: join_key_value}
:param return_sql: Whether to return the SQL needed to get the vector or the values themselves. Default False
:return: Pandas Dataframe or str (SQL statement)
"""
Expand Down Expand Up @@ -735,12 +736,16 @@ def __log_mlflow_results(self, name, rounds, mlflow_results):
:param name: MLflow run name
:param rounds: Number of rounds of feature elimination that were run
:param mlflow_results: The params / metrics to log
:return:
"""
with self.mlflow_ctx.start_run(run_name=name):
try:
if self.mlflow_ctx.active_run():
self.mlflow_ctx.start_run(run_name=name)
for r in range(rounds):
with self.mlflow_ctx.start_run(run_name=f'Round {r}', nested=True):
self.mlflow_ctx.log_metrics(mlflow_results[r])
finally:
self.mlflow_ctx.end_run()


def __prune_features_for_elimination(self, features) -> List[Feature]:
"""
Expand Down
7 changes: 4 additions & 3 deletions splicemachine/spark/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,16 +124,17 @@ def replaceDataframeSchema(self, dataframe, schema_table_name):

def fileToTable(self, file_path, schema_table_name, primary_keys=None, drop_table=False, **pandas_args):
"""
Load a file from the local filesystem and create a new table (or recreate an existing table), and load the data
from the file into the new table
Load a file from the local filesystem or from a remote location and create a new table
(or recreate an existing table), and load the data from the file into the new table. Any file_path that can be
read by pandas should work here.
:param file_path: The local file to load
:param schema_table_name: The schema.table name
:param primary_keys: List[str] of primary keys for the table. Default None
:param drop_table: Whether or not to drop the table. If this is False and the table already exists, the
function will fail. Default False
:param pandas_args: Extra parameters to be passed into the pd.read_csv function. Any parameters accepted
in pd.read_csv will work here
in pd.read_csv will work here
:return: None
"""
import pandas as pd
Expand Down

0 comments on commit f243e0f

Please sign in to comment.