Skip to content

Commit

Permalink
feat: run all ML pipelines using GA
Browse files Browse the repository at this point in the history
  • Loading branch information
iusztinpaul committed Nov 8, 2024
1 parent 8861695 commit 4fe884f
Show file tree
Hide file tree
Showing 12 changed files with 203 additions and 16 deletions.
11 changes: 10 additions & 1 deletion .github/workflows/feature_pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ jobs:
uses: astral-sh/setup-uv@v3
with:
version: 0.4.30

- name: Get notebook list
id: ml_pipelines
run: |
echo "notebooks=$(ls notebooks/*.ipynb | sort -n | jq -R -s -c 'split("\n")[:-1]')" >> $GITHUB_OUTPUT
strategy:
matrix:
notebook: ${{ fromJson(steps.notebooks.outputs.notebooks) }}

- name: Set up Python
run: uv python install
Expand All @@ -33,7 +42,7 @@ jobs:
uv sync --all-extras --dev
- name: Run pipeline
run: uv run ipython notebooks/1_feature_engineering.ipynb
run: uv run ipython ${{ matrix.ml_pipelines }}
env:
HOPSWORKS_API_KEY: ${{ secrets.HOPSWORKS_API_KEY }}

22 changes: 9 additions & 13 deletions notebooks/1_feature_engineering.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,13 @@
"\n",
"root_dir = str(Path().absolute().parent)\n",
"if root_dir not in sys.path:\n",
" sys.path.append(root_dir)"
" sys.path.append(root_dir)\n",
"\n",
"# Exit the notebook\n",
"print(\"BAAAAM\")\n",
"sys.exit(\"Exiting notebook\")\n",
"\n",
"\n"
]
},
{
Expand Down Expand Up @@ -143,12 +149,12 @@
"\n",
"import random\n",
"import polars as pl\n",
"import numpy as np\n",
"import torch\n",
"from sentence_transformers import SentenceTransformer\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"from recsys import utils\n",
"from recsys.features.articles import (\n",
" prepare_articles, \n",
" generate_embeddings_for_dataframe,\n",
Expand Down Expand Up @@ -187,17 +193,7 @@
}
],
"source": [
"import hopsworks\n",
"\n",
"# TODO: How to adapt this for GA and Modal?\n",
"HOPSWORKS_API_KEY = os.environ.get(\"HOPSWORKS_API_KEY\")\n",
"if HOPSWORKS_API_KEY:\n",
" print(\"Found Hopsworks API Key!\")\n",
" project = hopsworks.login(api_key_value=HOPSWORKS_API_KEY)\n",
"else:\n",
" project = hopsworks.login()\n",
"\n",
"fs = project.get_feature_store()"
"fs = utils.get_hopsworks_feature_store()"
]
},
{
Expand Down
6 changes: 5 additions & 1 deletion notebooks/2_train_retrieval_model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@
"\n",
"root_dir = str(Path().absolute().parent)\n",
"if root_dir not in sys.path:\n",
" sys.path.append(root_dir)"
" sys.path.append(root_dir)\n",
"\n",
"# Exit the notebook\n",
"print(\"BAAAAM\")\n",
"sys.exit(\"Exiting notebook\")"
]
},
{
Expand Down
6 changes: 5 additions & 1 deletion notebooks/3_embeddings_creation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@
"\n",
"root_dir = str(Path().absolute().parent)\n",
"if root_dir not in sys.path:\n",
" sys.path.append(root_dir)"
" sys.path.append(root_dir)\n",
"\n",
"# Exit the notebook\n",
"print(\"BAAAAM\")\n",
"sys.exit(\"Exiting notebook\")"
]
},
{
Expand Down
18 changes: 18 additions & 0 deletions notebooks/4_train_ranking_model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,24 @@
"In this notebook, you will train a ranking model using gradient boosted trees. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"from pathlib import Path\n",
"\n",
"root_dir = str(Path().absolute().parent)\n",
"if root_dir not in sys.path:\n",
" sys.path.append(root_dir)\n",
"\n",
"# Exit the notebook\n",
"print(\"BAAAAM\")\n",
"sys.exit(\"Exiting notebook\")"
]
},
{
"cell_type": "code",
"execution_count": 1,
Expand Down
18 changes: 18 additions & 0 deletions notebooks/5_create_deployments.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,24 @@
"**NOTE Currently the transformer scripts are not implemented.**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"from pathlib import Path\n",
"\n",
"root_dir = str(Path().absolute().parent)\n",
"if root_dir not in sys.path:\n",
" sys.path.append(root_dir)\n",
"\n",
"# Exit the notebook\n",
"print(\"BAAAAM\")\n",
"sys.exit(\"Exiting notebook\")"
]
},
{
"cell_type": "code",
"execution_count": 22,
Expand Down
19 changes: 19 additions & 0 deletions notebooks/7_job_scheduler.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,25 @@
"## <span style=\"color:#ff5f27\">🗓️ Job Scheduling </span>\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0dc1480f",
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"from pathlib import Path\n",
"\n",
"root_dir = str(Path().absolute().parent)\n",
"if root_dir not in sys.path:\n",
" sys.path.append(root_dir)\n",
"\n",
"# Exit the notebook\n",
"print(\"BAAAAM\")\n",
"sys.exit(\"Exiting notebook\")"
]
},
{
"cell_type": "code",
"execution_count": 1,
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ dependencies = [
"huggingface-hub==0.24.7",
"langchain-openai==0.1.14",
"langchain==0.2.6",
"loguru>=0.7.2",
"polars==1.9.0",
"sentence-transformers==2.2.2",
"streamlit==1.28.2",
Expand Down
16 changes: 16 additions & 0 deletions recsys/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import os
import hopsworks

from loguru import logger


def get_hopsworks_feature_store():
HOPSWORKS_API_KEY = os.environ.get("HOPSWORKS_API_KEY")
if HOPSWORKS_API_KEY:
logger.info("Loging to Hopsworks using HOPSWORKS_API_KEY env var.")
project = hopsworks.login(api_key_value=HOPSWORKS_API_KEY)
else:
logger.info("Login to Hopsworks using cached API KEY.")
project = hopsworks.login()

return project.get_feature_store()
File renamed without changes.
78 changes: 78 additions & 0 deletions tools/clean_hopsworks_resources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import hopsworks

# Login to Hopsworks
project = hopsworks.login()


# Get deployment registry
mr = project.get_model_serving()

# List all deployments
deployments = mr.get_deployments()

# Delete each deployment
for deployment in deployments:
print(f"Deleting deployment: {deployment.name}.")
deployment.delete()

# Get the model registry
mr = project.get_model_registry()

# List all models
for model_name in ["ranking_model", "candidate_model", "query_model"]:
models = mr.get_models(name=model_name)

# Delete each model
for model in models:
print(f"Deleting model: {model.name} (version: {model.version})")
model.delete()


# Get feature store
fs = project.get_feature_store()


for feature_view in [
"retrieval",
"articles",
"customers",
"candidate_embeddings",
"ranking",
]:
# Get all feature views
try:
feature_views = fs.get_feature_views(name=feature_view)
except:
print(f"Couldn't find feature view: {feature_view}. Skipping...")
feature_views = []

# Delete each feature view
for fv in feature_views:
print(f"Deleting feature view: {fv.name} (version: {fv.version})")
try:
fv.delete()
except Exception:
print(f"Failed to delete feature view {fv.name}.")

for feature_group in [
"customers",
"articles",
"transactions",
"interactions",
"candidate_embeddings_fg",
"ranking",
]:
# Get all feature groups
try:
feature_groups = fs.get_feature_groups(name=feature_group)
except:
print(f"Couldn't find feature group: {feature_view}. Skipping...")
feature_groups = []

# Delete each feature group
for fg in feature_groups:
print(f"Deleting feature group: {fg.name} (version: {fg.version})")
try:
fg.delete()
except:
print(f"Failed to delete feature group {fv.name}.")
24 changes: 24 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 4fe884f

Please sign in to comment.