Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ludwig upload to push artifacts to HuggingFace Hub #3480

Merged
merged 10 commits into from
Jul 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions ludwig/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
from ludwig.models.registry import model_type_registry
from ludwig.schema.model_config import ModelConfig
from ludwig.types import ModelConfigDict, TrainingSetMetadataDict
from ludwig.upload import get_upload_registry
from ludwig.utils import metric_utils
from ludwig.utils.backward_compatibility import upgrade_config_dict_to_latest_version
from ludwig.utils.config_utils import get_preprocessing_params
Expand Down Expand Up @@ -1652,6 +1653,53 @@ def save(self, save_path: str) -> None:
training_set_metadata_path = os.path.join(save_path, TRAIN_SET_METADATA_FILE_NAME)
save_json(training_set_metadata_path, self.training_set_metadata)

def upload_to_hf_hub(
self,
repo_id: str,
model_path: str,
repo_type: str = "model",
private: bool = False,
commit_message: str = "Upload trained [Ludwig](https://ludwig.ai/latest/) model weights",
commit_description: Optional[str] = None,
) -> bool:
"""Uploads trained model artifacts to the HuggingFace Hub.

Args:
repo_id (`str`):
A namespace (user or an organization) and a repo name separated
by a `/`.
model_path (`str`):
The path of the saved model. This is the top level directory where
the models weights as well as other associated training artifacts
are saved.
private (`bool`, *optional*, defaults to `False`):
Whether the model repo should be private.
repo_type (`str`, *optional*):
Set to `"dataset"` or `"space"` if uploading to a dataset or
space, `None` or `"model"` if uploading to a model. Default is
`None`.
commit_message (`str`, *optional*):
The summary / title / first line of the generated commit. Defaults to:
`f"Upload {path_in_repo} with huggingface_hub"`
commit_description (`str` *optional*):
The description of the generated commit

Returns:
bool: True for success, False for failure.
"""
model_service = get_upload_registry()["hf_hub"]
hub = model_service()
hub.login()
upload_status = hub.upload(
repo_id=repo_id,
model_path=model_path,
repo_type=repo_type,
private=private,
commit_message=commit_message,
commit_description=commit_description,
)
return upload_status

def save_config(self, save_path: str) -> None:
"""Save config to specified location.

Expand Down
6 changes: 6 additions & 0 deletions ludwig/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def __init__(self):
init_config Initialize a user config from a dataset and targets
render_config Renders the fully populated config with all defaults set
check_install Runs a quick training run on synthetic data to verify installation status
upload Push trained model artifacts to a registry (e.g., HuggingFace Hub)
""",
)
parser.add_argument("command", help="Subcommand to run")
Expand Down Expand Up @@ -179,6 +180,11 @@ def datasets(self):

datasets.cli(sys.argv[2:])

def upload(self):
from ludwig import upload

upload.cli(sys.argv[2:])


def main():
ludwig.contrib.preload(sys.argv)
Expand Down
129 changes: 129 additions & 0 deletions ludwig/upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import argparse
import logging
import sys
from typing import Optional

from ludwig.utils.print_utils import get_logging_level_registry
from ludwig.utils.upload_utils import HuggingFaceHub

logger = logging.getLogger(__name__)


def get_upload_registry():
return {
"hf_hub": HuggingFaceHub,
}


def upload_cli(
service: str,
repo_id: str,
model_path: str,
repo_type: str = "model",
private: bool = False,
commit_message: str = "Upload trained [Ludwig](https://ludwig.ai/latest/) model weights",
commit_description: Optional[str] = None,
**kwargs,
) -> None:
"""Create an empty repo on the HuggingFace Hub and upload trained model artifacts to that repo.

Args:
service (`str`):
Name of the hosted model service to push the trained artifacts to.
Currently, this only supports `hf_hub`.
repo_id (`str`):
A namespace (user or an organization) and a repo name separated
by a `/`.
model_path (`str`):
The path of the saved model. This is the top level directory where
the models weights as well as other associated training artifacts
are saved.
private (`bool`, *optional*, defaults to `False`):
Whether the model repo should be private.
repo_type (`str`, *optional*):
Set to `"dataset"` or `"space"` if uploading to a dataset or
space, `None` or `"model"` if uploading to a model. Default is
`None`.
commit_message (`str`, *optional*):
The summary / title / first line of the generated commit. Defaults to:
`f"Upload {path_in_repo} with huggingface_hub"`
commit_description (`str` *optional*):
The description of the generated commit
"""
model_service = get_upload_registry().get(service, "hf_hub")
hub = model_service()
hub.login()
hub.upload(
repo_id=repo_id,
model_path=model_path,
repo_type=repo_type,
private=private,
commit_message=commit_message,
commit_description=commit_description,
)


def cli(sys_argv):
parser = argparse.ArgumentParser(
description="This script pushes a trained model to a hosted model repository service",
prog="ludwig upload",
usage="%(prog)s [options]",
)

# ---------------
# Required parameters
# ---------------
parser.add_argument(
"service",
help="Name of the model repository service.",
default="hf_hub",
choices=["hf_hub"],
)

parser.add_argument(
"-r",
"--repo_id",
help="Name of the repo. This will be created if it doesn't exist. Format: username/repo_name",
required=True,
)

parser.add_argument("-m", "--model_path", help="Path of the trained model on disk", required=True)

# ---------------
# Optional parameters
# ---------------
parser.add_argument("-p", "--private", help="Make the repo private", default=False, choices=[True, False])

parser.add_argument(
"-t", "--repo_type", help="Type of repo", default="model", choices=["model", "space", "dataset"]
)

parser.add_argument(
"-c",
"--commit_message",
help="The summary / title / first line of the generated commit.",
default="Upload trained [Ludwig](https://ludwig.ai/latest/) model weights",
)

parser.add_argument("-d", "--commit_description", help="The description of the generated commit", default=None)

parser.add_argument(
"-l",
"--logging_level",
default="info",
help="The level of logging to use",
choices=["critical", "error", "warning", "info", "debug", "notset"],
)

args = parser.parse_args(sys_argv)

args.logging_level = get_logging_level_registry()[args.logging_level]
logging.getLogger("ludwig").setLevel(args.logging_level)
global logger
logger = logging.getLogger("ludwig.upload")

upload_cli(**vars(args))


if __name__ == "__main__":
cli(sys.argv[1:])
Loading
Loading