Skip to content

Commit

Permalink
fix: add dangling repo cleanup to hf repos (#891)
Browse files Browse the repository at this point in the history
* feat: add clean_hf script to all.sh
  • Loading branch information
NiklasKoehneckeAA authored Jun 11, 2024
1 parent 19f5ad0 commit 9a9afcb
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 5 deletions.
1 change: 1 addition & 0 deletions scripts/all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ set +a
./scripts/doctest.sh
./scripts/notebook_runner.sh
./scripts/test.sh
python "$(dirname "$0")/clean_hf.py"
23 changes: 23 additions & 0 deletions scripts/clean_hf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import os
import warnings

from dotenv import load_dotenv
from huggingface_hub import HfApi # type: ignore


def clean_up_dangling_hf_repos(hugging_face_token: str) -> None:
api = HfApi(token=hugging_face_token)
datasets = list(
api.list_datasets(author="Aleph-Alpha", dataset_name="IL-temp-tests")
)
if len(datasets) > 0:
warnings.warn("dangling hf datasets found, attempting to delete")
for dataset in datasets:
api.delete_repo(dataset.id, repo_type="dataset", missing_ok=True)


if __name__ == "__main__":
load_dotenv()
token = os.getenv("HUGGING_FACE_TOKEN")
assert isinstance(token, str)
clean_up_dangling_hf_repos(token)
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from uuid import uuid4

import huggingface_hub # type: ignore

from intelligence_layer.evaluation.infrastructure.hugging_face_repository import (
Expand All @@ -8,15 +6,15 @@


def test_hugging_face_repository_can_create_and_delete_a_repository(
hugging_face_token: str,
hugging_face_token: str, hugging_face_test_repository_id: str
) -> None:
repository_id = f"Aleph-Alpha/test-{uuid4()}"
repository_id = hugging_face_test_repository_id + "unused-suffix"

assert not huggingface_hub.repo_exists(
repo_id=repository_id,
token=hugging_face_token,
repo_type="dataset",
), f"This is very unlikely but it seems that the repository with the ID {repository_id} already exists."
), f"The repository with the ID {repository_id} already exists. Try to run the clean_hf script."

created_repository = HuggingFaceRepository(
repository_id=repository_id,
Expand Down

0 comments on commit 9a9afcb

Please sign in to comment.