diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 3be3b31..cb5a016 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -20,15 +20,15 @@ jobs: uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v1 + uses: actions/setup-python@v4 with: python-version: 3.11 - name: Install Python dependencies - run: pip install black pylint torchserve==0.8.2 torch==2.0.1 transformers==4.33.0 -r llm/requirements.txt + run: pip install pytest black pylint torchserve==0.8.2 torch==2.0.1 transformers==4.33.0 -r llm/requirements.txt - - name: Run linters - uses: wearerequired/lint-action@v2 - with: - black: true - pylint: true \ No newline at end of file + - name: Run pylint + run: pylint ./llm + + - name: Run black + run: black ./llm --check \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..583b5cd --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,32 @@ +name: Python Test with LLM + +on: + push: + branches: + - main # Change this to your main branch + pull_request: + branches: + - main + +jobs: + test: + name: Test with LLM + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + + - name: Install dependencies + run: pip install pytest -r requirements.txt + working-directory: ./llm + + - name: Run tests + run: python3 -m pytest tests -v + working-directory: ./llm + diff --git a/README.md b/README.md index 1a87635..67d3900 100644 --- a/README.md +++ b/README.md @@ -113,7 +113,8 @@ bash $WORK_DIR/llm/run.sh -n llama2_7b -d data/summarize -g 1 -e llm-deploy -f ' set HOST and PORT ``` export INGRESS_HOST=$(kubectl get po -l istio=ingressgateway -n istio-system -o jsonpath='{.items[0].status.hostIP}') -export INGRESS_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="http2")].port}') + +export INGRESS_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}') ``` set Service Host Name @@ -145,8 +146,37 @@ curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http: If keep alive flag was set in the bash script, then you can run the following command to stop the server and clean up temporary files -python3 $WORK_DIR/llm/utils/cleanup.py --deploy_name +python3 $WORK_DIR/llm/cleanup.py --deploy_name + +``` +python3 $WORK_DIR/llm/cleanup.py --deploy_name llm-deploy +``` + +## Custom Model Support + +We provide the capability to generate a MAR file with custom models and start an inference server using Kubeflow serving.
+### Generate MAR file for custom model +To generate the MAR file, run the following: ``` -python3 $WORK_DIR/llm/utils/cleanup.py --deploy_name llm-deploy +python3 download.py --no_download [--repo_version --handler ] --model_name --model_path --output +``` +- no_download: Set flag to skip downloading the model files, must be set for custom models +- model_name: Name of custom model, this name must not be in model_config +- repo_version: Any model version, defaults to "1.0" (optional) +- model_path: Absolute path of custom model files (should be non empty) +- output: Mount path to your nfs server to be used in the kube PV where config.properties and model archive file be stored +- handler: Path to custom handler, defaults to llm/handler.py (optional)
+ +### Start Torchserve and run inference for custom model +Run the following command for starting Kubeflow serving and running inference on the given input with a custom MAR file: ``` +bash run.sh -n -g -f -m -e [OPTIONAL -d ] +``` +- n: Name of custom model, this name must not be in model_config +- d: Absolute path of input data folder (Optional) +- g: Number of gpus to be used to execute (Set 0 to use cpu) +- f: NFS server address with share path information +- m: Mount path to your nfs server to be used in the kube PV where model files and model archive file be stored +- e: Name of the deployment metadata + diff --git a/llm/cleanup.py b/llm/cleanup.py index 2f50ec9..1d72691 100644 --- a/llm/cleanup.py +++ b/llm/cleanup.py @@ -8,7 +8,7 @@ from kserve import KServeClient -def kubernetes(deploy_name): +def kubernetes(deploy_name: str) -> None: """ This function cleans up various Kubernetes resources, including deleting the deployment, persistent volume claims (PVCs), and diff --git a/llm/download.py b/llm/download.py index 7c87f13..a103b74 100644 --- a/llm/download.py +++ b/llm/download.py @@ -7,26 +7,26 @@ import json import sys import re -import dataclasses from collections import Counter -from huggingface_hub import snapshot_download, HfApi -from huggingface_hub.utils import ( - RepositoryNotFoundError, - RevisionNotFoundError, -) +from typing import List +from huggingface_hub import snapshot_download import utils.marsgen as mg +import utils.hf_utils as hf +import utils.tsutils as ts +from utils.generate_data_model import GenerateDataModel from utils.system_utils import ( check_if_path_exists, create_folder_if_not_exists, delete_directory, copy_file, get_all_files_in_directory, + check_if_folder_empty, ) CONFIG_DIR = "config" CONFIG_FILE = "config.properties" MODEL_STORE_DIR = "model-store" -MODEL_FILES_LOCATION = "download" +HANDLER = "handler.py" MODEL_CONFIG_PATH = os.path.join(os.path.dirname(__file__), "model_config.json") FILE_EXTENSIONS_TO_IGNORE = [ ".safetensors", @@ -39,7 +39,7 @@ ] -def get_ignore_pattern_list(extension_list): +def get_ignore_pattern_list(extension_list: List[str]) -> List[str]: """ This function takes a list of file extensions and returns a list of patterns that can be used to filter out files with these extensions. @@ -51,7 +51,7 @@ def get_ignore_pattern_list(extension_list): return ["*" + pattern for pattern in extension_list] -def compare_lists(list1, list2): +def compare_lists(list1: List[str], list2: List[str]) -> bool: """ This function checks if two lists are equal by comparing their contents, regardless of the order. @@ -65,7 +65,9 @@ def compare_lists(list1, list2): return Counter(list1) == Counter(list2) -def filter_files_by_extension(filenames, extensions_to_remove): +def filter_files_by_extension( + filenames: List[str], extensions_to_remove: List[str] +) -> List[str]: """ This function takes a list of filenames and a list of extensions to remove. It returns a new list of filenames @@ -85,120 +87,22 @@ def filter_files_by_extension(filenames, extensions_to_remove): return filtered_filenames -@dataclasses.dataclass -class MarUtils: - """ - A class for representing information about a Model Archive (MAR). - - Attributes: - mar_output (str): The path to the MAR output directory. - model_path (str): The path to the model directory. - handler_path (str): The path to the model handler script. - """ - - mar_output = str() - model_path = str() - handler_path = str() - - -@dataclasses.dataclass -class RepoInfo: - """ - A class for specifying details related to a model repository in HuggingFace. - - Attributes: - repo_id (str): The identifier of the model repository. - repo_version (str): The version of the model in the repository. - hf_token (str): The Hugging Face token for authentication. - """ - - repo_id = str() - repo_version = str() - hf_token = str() - - -@dataclasses.dataclass -class DownloadDataModel: - """ - A class representing a model download configuration for data retrieval. - - Attributes: - - model_name (str): The name of the model to be downloaded. - - download_model (bool): A boolean indicating whether to download - the model (True) or not (False). - - model_path (str): The path where the downloaded model will be stored locally. - - output (str): Mount path to the nfs server to be used in the kube - PV where model files and model archive file be stored. - - mar_output (str): The output directory specifically for the Model Archive (MAR) files. - - repository_info (dict): Dictionary that will contain the unique identifier - or name of the model repository, the version of the model - within the repository and the path to the model's handler - script for custom processing - - hf_token (str): The Hugging Face API token for authentication when - accessing models from the Hugging Face Model Hub. - - debug (bool): A boolean indicating whether to enable debugging mode for - the download process (True) or not (False). - """ - - model_name = str() - download_model = bool() - output = str() - mar_utils = MarUtils() - repo_info = RepoInfo() - debug = bool() - - -def set_values(params): - """ - Set values for the DownloadDataModel object based on the command-line arguments. - Args: - params: An argparse.Namespace object containing command-line arguments. - Returns: - DownloadDataModel: An instance of the DownloadDataModel - class with values set based on the arguments. - """ - dl_model = DownloadDataModel() - - dl_model.model_name = params.model_name - dl_model.download_model = params.no_download - dl_model.output = params.output - - dl_model.mar_utils.handler_path = params.handler_path - - dl_model.repo_info.repo_version = params.repo_version - dl_model.repo_info.hf_token = params.hf_token - - dl_model.debug = params.debug - read_config_for_download(dl_model) - check_if_path_exists(dl_model.output, "output", is_dir=True) - - dl_model.mar_utils.model_path = os.path.join( - dl_model.output, - dl_model.model_name, - dl_model.repo_info.repo_version, - MODEL_FILES_LOCATION, - ) - dl_model.mar_utils.mar_output = os.path.join( - dl_model.output, - dl_model.model_name, - dl_model.repo_info.repo_version, - MODEL_STORE_DIR, - ) - return dl_model - - -def set_config(dl_model): +def set_config(gen_model: GenerateDataModel) -> None: """ This function creates a configuration file for the downloaded model and sets certain parameters. Args: - dl_model (DownloadDataModel): An instance of the DownloadDataModel + gen_model (GenerateDataModel): An instance of the GenerateDataModel class with relevant information. Returns: None """ - model_spec_path = os.path.join( - dl_model.output, dl_model.model_name, dl_model.repo_info.repo_version - ) + if gen_model.is_custom: + model_spec_path = os.path.join(gen_model.output, gen_model.model_name) + else: + model_spec_path = os.path.join( + gen_model.output, gen_model.model_name, gen_model.repo_info.repo_version + ) + config_folder_path = os.path.join(model_spec_path, CONFIG_DIR) create_folder_if_not_exists(config_folder_path) @@ -207,149 +111,145 @@ class with relevant information. copy_file(source_config_file, config_file_path) check_if_path_exists(config_file_path, "Config") - mar_filename = f"{dl_model.model_name}.mar" + mar_filename = f"{gen_model.model_name}.mar" check_if_path_exists( os.path.join(model_spec_path, MODEL_STORE_DIR, mar_filename), "Model store" ) # Check if mar file exists + ( + initial_workers, + batch_size, + max_batch_delay, + response_timeout, + ) = ts.get_params_for_registration(gen_model.model_name) + config_info = [ "\ninstall_py_dep_per_model=true\n", "model_store=/mnt/models/model-store\n", f'model_snapshot={{"name":"startup.cfg","modelCount":1,' - f'"models":{{"{dl_model.model_name}":{{' - f'"1.0":{{"defaultVersion":true,"marName":"{dl_model.model_name}.mar","minWorkers":1,' - f'"maxWorkers":1,"batchSize":1,"maxBatchDelay":500,"responseTimeout":60}}}}}}}}', + f'"models":{{"{gen_model.model_name}":{{' + f'"{gen_model.repo_info.repo_version}":{{"defaultVersion":true,' + f'"marName":"{gen_model.model_name}.mar",' + f'"minWorkers":{initial_workers or 1},' + f'"maxWorkers":{initial_workers or 1},' + f'"batchSize":{batch_size or 1},"maxBatchDelay":{max_batch_delay or 500},' + f'"responseTimeout":{response_timeout or 2000}}}}}}}}}', ] with open(config_file_path, "a", encoding="utf-8") as config_file: config_file.writelines(config_info) -def check_if_model_files_exist(dl_model): +def check_if_model_files_exist(gen_model: GenerateDataModel) -> bool: """ This function compares the list of files in the downloaded model directory with the list of files in the HuggingFace repository. It takes into account any files to ignore based on predefined extensions. Args: - dl_model (DownloadDataModel): An instance of the DownloadDataModel + gen_model (GenerateDataModel): An instance of the GenerateDataModel class with relevant information. Returns: bool: True if the downloaded model files match the expected repository files, False otherwise. """ - extra_files_list = get_all_files_in_directory(dl_model.mar_utils.model_path) - hf_api = HfApi() - repo_files = hf_api.list_repo_files( - repo_id=dl_model.repo_info.repo_id, - revision=dl_model.repo_info.repo_version, - token=dl_model.repo_info.hf_token, - ) + extra_files_list = get_all_files_in_directory(gen_model.mar_utils.model_path) + repo_files = hf.get_repo_files_list(gen_model) repo_files = filter_files_by_extension(repo_files, FILE_EXTENSIONS_TO_IGNORE) return compare_lists(extra_files_list, repo_files) -def check_if_mar_file_exist(dl_model): +def check_if_mar_file_exist(gen_model: GenerateDataModel) -> bool: """ This function checks if the Model Archive (MAR) file for the downloaded model exists in the specified output directory. Args: - dl_model (DownloadDataModel): An instance of the DownloadDataModel + gen_model (GenerateDataModel): An instance of the GenerateDataModel class with relevant information. Returns: bool: True if the MAR file exists, False otherwise. """ - mar_filename = f"{dl_model.model_name}.mar" - if os.path.exists(dl_model.mar_utils.mar_output): - directory_contents = os.listdir(dl_model.mar_utils.mar_output) + mar_filename = f"{gen_model.model_name}.mar" + if os.path.exists(gen_model.mar_utils.mar_output): + directory_contents = os.listdir(gen_model.mar_utils.mar_output) return len(directory_contents) == 1 and directory_contents[0] == mar_filename return False -def read_config_for_download(dl_model): +def read_config_for_download(gen_model: GenerateDataModel) -> None: """ This function reads repo id, version and handler name - from model_config.json and sets values for the DownloadDataModel object. + from model_config.json and sets values for the GenerateDataModel object. Args: - dl_model (DownloadDataModel): An instance of the DownloadDataModel + gen_model (GenerateDataModel): An instance of the GenerateDataModel class with relevant information. Returns: None Raises: - sys.exit(1): If model name,repo_id or repo_version is not valid, the - function will terminate the program with an exit code of 1. + sys.exit(1): If model name is not valid, the function will + terminate the program with an exit code of 1. """ check_if_path_exists(MODEL_CONFIG_PATH) with open(MODEL_CONFIG_PATH, encoding="utf-8") as f: models = json.loads(f.read()) - if dl_model.model_name in models: - try: - # validation to check if model repo commit id is valid or not - model = models[dl_model.model_name] - dl_model.repo_info.repo_id = model["repo_id"] - if ( - dl_model.repo_info.repo_id.startswith("meta-llama") - and dl_model.repo_info.hf_token is None - ): - # Make sure there is HF hub token for LLAMA(2) - print( - ( - "HuggingFace Hub token is required for llama download. " - "Please specify it using --hf_token=. Refer " - "https://huggingface.co/docs/hub/security-tokens" - ) - ) - sys.exit(1) - - if dl_model.repo_info.repo_version == "": - dl_model.repo_info.repo_version = model["repo_version"] - - hf_api = HfApi() - commit_info = hf_api.list_repo_commits( - repo_id=dl_model.repo_info.repo_id, - revision=dl_model.repo_info.repo_version, - token=dl_model.repo_info.hf_token, + if gen_model.model_name in models: + # validation to check if model repo commit id is valid or not + model = models[gen_model.model_name] + gen_model.repo_info.repo_id = model["repo_id"] + + hf.hf_token_check(gen_model.repo_info.repo_id, gen_model.repo_info.hf_token) + + if gen_model.repo_info.repo_version == "": + gen_model.repo_info.repo_version = model["repo_version"] + + gen_model.repo_info.repo_version = hf.get_repo_commit_id( + repo_id=gen_model.repo_info.repo_id, + revision=gen_model.repo_info.repo_version, + token=gen_model.repo_info.hf_token, + ) + + if ( + gen_model.mar_utils.handler_path == "" + and model.get("handler") + and model["handler"] + ): + gen_model.mar_utils.handler_path = os.path.join( + os.path.dirname(__file__), + model["handler"], ) - dl_model.repo_info.repo_version = commit_info[0].commit_id - - if ( - dl_model.mar_utils.handler_path == "" - and model.get("handler") - and model["handler"] - ): - dl_model.mar_utils.handler_path = os.path.join( - os.path.dirname(__file__), - model["handler"], - ) - check_if_path_exists(dl_model.mar_utils.handler_path, "Handler") - except (RepositoryNotFoundError, RevisionNotFoundError, KeyError): - print( - ( - "## Error: Please check either repo_id, repo_version " - "or huggingface token is not correct" - ) + check_if_path_exists(gen_model.mar_utils.handler_path, "Handler") + elif not gen_model.download_model: + gen_model.is_custom = True + if gen_model.mar_utils.handler_path == "": + gen_model.mar_utils.handler_path = os.path.join( + os.path.dirname(__file__), + HANDLER, ) - sys.exit(1) + if gen_model.repo_info.repo_version == "": + gen_model.repo_info.repo_version = "1.0" else: print( "## Please check your model name, it should be one of the following : " ) print(list(models.keys())) + print( + "If it is a custom model and you have model files include no_download flag : " + ) sys.exit(1) -def run_download(dl_model): +def run_download(gen_model: GenerateDataModel) -> GenerateDataModel: """ This function checks if model files are present at given model path otherwise downloads the given version's model files at that path. Args: - dl_model (DownloadDataModel): An instance of the DownloadDataModel + gen_model (GenerateDataModel): An instance of the GenerateDataModel class with relevant information. Returns: - DownloadDataModel: An instance of the DownloadDataModel class. + GenerateDataModel: An instance of the GenerateDataModel class. """ - if os.path.exists(dl_model.mar_utils.model_path) and check_if_model_files_exist( - dl_model + if os.path.exists(gen_model.mar_utils.model_path) and check_if_model_files_exist( + gen_model ): print( ( @@ -357,68 +257,85 @@ class with relevant information. " repo version are already present\n" ) ) - return dl_model + return gen_model print("## Starting model files download\n") - delete_directory(dl_model.mar_utils.model_path) - create_folder_if_not_exists(dl_model.mar_utils.model_path) + delete_directory(gen_model.mar_utils.model_path) + create_folder_if_not_exists(gen_model.mar_utils.model_path) snapshot_download( - repo_id=dl_model.repo_info.repo_id, - revision=dl_model.repo_info.repo_version, - local_dir=dl_model.mar_utils.model_path, + repo_id=gen_model.repo_info.repo_id, + revision=gen_model.repo_info.repo_version, + local_dir=gen_model.mar_utils.model_path, local_dir_use_symlinks=False, - token=dl_model.repo_info.hf_token, + token=gen_model.repo_info.hf_token, ignore_patterns=get_ignore_pattern_list(FILE_EXTENSIONS_TO_IGNORE), ) print("## Successfully downloaded model_files\n") - return dl_model + return gen_model -def create_mar(dl_model): +def create_mar(gen_model: GenerateDataModel) -> None: """ This function checks if the Model Archive (MAR) file for the downloaded model exists in the specified model path otherwise generates the MAR file. Args: - dl_model (DownloadDataModel): An instance of the DownloadDataModel + gen_model (GenerateDataModel): An instance of the GenerateDataModel class with relevant information. Returns: None """ - if check_if_mar_file_exist(dl_model): + if check_if_mar_file_exist(gen_model): print("## Skipping generation of model archive file as it is present\n") else: - check_if_path_exists(dl_model.mar_utils.model_path, "model_path", is_dir=True) - if not check_if_model_files_exist(dl_model): - # checking if local model files are same the repository files - print("## Model files do not match HuggingFace repository Files") - sys.exit(1) + check_if_path_exists(gen_model.mar_utils.model_path, "model_path", is_dir=True) + if not gen_model.is_custom: + if not check_if_model_files_exist(gen_model): + # checking if local model files are same the repository files + print("## Model files do not match HuggingFace repository Files") + sys.exit(1) + else: + if check_if_folder_empty(gen_model.mar_utils.model_path): + print( + f"\n##Error: {gen_model.model_name} model files not found" + f" in the provided path: {gen_model.mar_utils.model_path}" + ) + sys.exit(1) + else: + print( + f"\n## Generating MAR file for custom model files: {gen_model.model_name}" + ) - create_folder_if_not_exists(dl_model.mar_utils.mar_output) + create_folder_if_not_exists(gen_model.mar_utils.mar_output) mg.generate_mars( - dl_model=dl_model, + gen_model=gen_model, model_config=MODEL_CONFIG_PATH, - model_store_dir=dl_model.mar_utils.mar_output, - debug=dl_model.debug, + model_store_dir=gen_model.mar_utils.mar_output, + debug=gen_model.debug, ) -def run_script(params): +def run_script(params: argparse.Namespace) -> bool: """ Execute a series of steps to run a script for downloading model files, creating model archive file, and config file for a LLM. Args: - params (dict): A dictionary containing the necessary parameters - and configurations for the script. + params (argparse.Namespace): A Namespace object containing the necessary + parameters and configurations for the script. Returns: None """ - dl_model = set_values(params) - if dl_model.download_model: - dl_model = run_download(dl_model) + gen_model = GenerateDataModel(params) + read_config_for_download(gen_model) + check_if_path_exists(gen_model.output, "output", is_dir=True) + gen_model.set_model_files_and_mar(params) - create_mar(dl_model) - set_config(dl_model) + if gen_model.download_model: + gen_model = run_download(gen_model) + + create_mar(gen_model) + set_config(gen_model) + return True if __name__ == "__main__": @@ -434,6 +351,13 @@ def run_script(params): parser.add_argument( "--no_download", action="store_false", help="flag to not download" ) + parser.add_argument( + "--model_path", + type=str, + default="", + metavar="mf", + help="absolute path of the model files", + ) parser.add_argument( "--output", type=str, diff --git a/llm/handler.py b/llm/handler.py index 9226329..339ed98 100644 --- a/llm/handler.py +++ b/llm/handler.py @@ -1,15 +1,18 @@ """ Serves as a handler for a LLM, allowing it to be used in an inference service. -The handler provides functions to preprocess input data, make predictions using the model, +The handler provides functions to preprocess input data, make predictions using the model, and post-process the output for a particular use case. """ import logging import os from abc import ABC from collections import defaultdict +from typing import List, Dict import torch import transformers from ts.torch_handler.base_handler import BaseHandler +import ts + logger = logging.getLogger(__name__) logger.info("Transformers version %s", transformers.__version__) @@ -55,8 +58,9 @@ class LLMHandler(BaseHandler, ABC): data (list(str)): A list containing the output text of model generation. Returns: list(str): A list containing model's generated output. - _batch_to_json(data: list(str)) -> list(str): - _to_json(data: (str)) -> json(str): + _batch_to_json(data: list(str)) -> list(str): Convertes list of string output + in json format + _to_json(data: (str)) -> json(str): Convertes string output in json format get_env_value(str) -> float: This method reads the inputed environment variable and converts it to float and returns it. This is used for reading model generation parameters. @@ -77,9 +81,10 @@ def __init__(self): self.tokenizer = None self.map_location = None self.device = None + self.device_map = None self.model = None - def initialize(self, context): + def initialize(self, context: ts.context.Context): """ This method loads the Hugging Face model and tokenizer based on the provided model name and model files present in MAR file. @@ -93,16 +98,13 @@ def initialize(self, context): and torch.cuda.is_available() and properties.get("gpu_id") is not None ): - self.map_location = "cuda" - self.device = torch.device( - self.map_location + ":" + str(properties.get("gpu_id")) - ) + self.device = torch.device("cuda") + self.device_map = "auto" else: - self.map_location = "cpu" - self.device = torch.device(self.map_location) + self.device = self.device_map = torch.device("cpu") self.tokenizer = transformers.AutoTokenizer.from_pretrained( - model_dir, local_files_only=True, device_map=self.device + model_dir, local_files_only=True, device_map=self.device_map ) self.tokenizer.pad_token = ( self.tokenizer.eos_token @@ -113,7 +115,7 @@ def initialize(self, context): self.model = transformers.AutoModelForCausalLM.from_pretrained( model_dir, torch_dtype=torch.bfloat16, # Load model weights in bfloat16 - device_map=self.device, + device_map=self.device_map, local_files_only=True, trust_remote_code=True, ) @@ -122,11 +124,11 @@ def initialize(self, context): self.initialized = True logger.info("Initialized TorchServe Server!") - def preprocess(self, data): + def preprocess(self, data: List) -> torch.Tensor: """ This method tookenizes input text using the associated tokenizer. Args: - text (str): The input text to be tokenized. + data (str): The input text to be tokenized. Returns: Tensor: Tokenized input data """ @@ -165,7 +167,7 @@ def preprocess(self, data): return encoded_input - def inference(self, data, *args, **kwargs): + def inference(self, data: torch.Tensor, *args, **kwargs) -> List: """ This method reads the generation parameters set as environment vairables and uses the preprocessed tokens and generation parameters to generate a @@ -204,13 +206,13 @@ def inference(self, data, *args, **kwargs): logger.info("Generated text is: %s", ", ".join(map(str, inference))) return inference - def postprocess(self, data): + def postprocess(self, data: List) -> List: """ This method returns the list of generated text recieved. Args: data (list(str)): A list containing the output text of model generation. Returns: - list(str): A list containing model's generated output. + list: A list containing model's generated output. """ response_list = [] idx = 0 @@ -242,7 +244,7 @@ def postprocess(self, data): return response_list - def _batch_to_json(self, data): + def _batch_to_json(self, data: List) -> List: """ Splits batch output to json objects """ @@ -251,7 +253,7 @@ def _batch_to_json(self, data): output.append(self._to_json(item)) return output - def _to_json(self, data): + def _to_json(self, data: str) -> Dict: """ Constructs JSON object from data """ @@ -266,12 +268,11 @@ def _to_json(self, data): output_data["data"] = [data] return output_data - def get_env_value(self, env_var): + def get_env_value(self, env_var: str) -> float: """ This function gets the value of an environment variable as a float. Args: env_var (str): The name of the environment variable to retrieve. - Returns: float or None: The float value of the environment variable if conversion is successful, or None otherwise. diff --git a/llm/kubeflow_inference_run.py b/llm/kubeflow_inference_run.py index ef7525c..83ce6cf 100644 --- a/llm/kubeflow_inference_run.py +++ b/llm/kubeflow_inference_run.py @@ -7,7 +7,9 @@ import sys import os import time +from typing import List, Dict import utils.tsutils as ts +import utils.hf_utils as hf from utils.system_utils import check_if_path_exists, get_all_files_in_directory from kubernetes import client, config from kserve import ( @@ -27,7 +29,7 @@ kubMemUnits = ["Ei", "Pi", "Ti", "Gi", "Mi", "Ki"] -def get_inputs_from_folder(input_path): +def get_inputs_from_folder(input_path: str) -> List: """ Retrieve a list of file paths of inputs for inference within a specified directory. @@ -47,27 +49,47 @@ def get_inputs_from_folder(input_path): ) -def check_if_valid_version(model_name, repo_version, mount_path): +def check_if_valid_version(model_info: Dict, mount_path: str) -> str: """ Check if the model files for a specific commit ID exist in the given directory. Args: - model_name (str): The name of the model. - repo_version (str): The commit ID of HuggingFace repo of the model. + model_info(dict): A dictionary containing the following: + model_name (str): The name of the model. + repo_version (str): The commit ID of HuggingFace repo of the model. + repo_id (str): The repo id. + hf_token (str): Your HuggingFace token (Required only for LLAMA2 model). mount_path (str): The local file server mount path where the model files are expected. Raises: sys.exit(1): If the model files do not exist, the function will terminate the program with an exit code of 1. """ - model_spec_path = os.path.join(mount_path, model_name, repo_version) + hf.hf_token_check(model_info["repo_id"], model_info["hf_token"]) + model_info["repo_version"] = hf.get_repo_commit_id( + repo_id=model_info["repo_id"], + revision=model_info["repo_version"], + token=model_info["hf_token"], + ) + print(model_info) + model_spec_path = os.path.join( + mount_path, model_info["model_name"], model_info["repo_version"] + ) if not os.path.exists(model_spec_path): print( - f"## ERROR: The {model_name} model files for given commit ID are not downloaded" + f"## ERROR: The {model_info['model_name']} model files for given commit ID " + "are not downloaded" ) sys.exit(1) + return model_info["repo_version"] -def create_pv(core_api, deploy_name, storage, nfs_server, nfs_path): +def create_pv( + core_api: client.CoreV1Api, + deploy_name: str, + storage: str, + nfs_server: str, + nfs_path: str, +) -> None: """ This function creates a Persistent Volume using the provided parameters. @@ -94,7 +116,7 @@ def create_pv(core_api, deploy_name, storage, nfs_server, nfs_path): core_api.create_persistent_volume(body=persistent_volume) -def create_pvc(core_api, deploy_name, storage): +def create_pvc(core_api: client.CoreV1Api, deploy_name: str, storage: str) -> None: """ This function creates a Persistent Volume Claim using the provided parameters. @@ -122,8 +144,8 @@ def create_pvc(core_api, deploy_name, storage): def create_isvc( - deploy_name, model_name, repo_version, deployment_resources, model_params -): + deploy_name: str, model_info: Dict, deployment_resources: Dict, model_params: Dict +) -> None: """ This function creates a inference service a PyTorch Predictor that expose LLMs as RESTful APIs, allowing to make predictions using the deployed LLMs. @@ -132,14 +154,18 @@ def create_isvc( Args: deploy_name (str): Name of the inference service. - model_name (str): The name of the model whose inference service is to be created. - repo_version (str): The commit ID of HuggingFace repo of the model. + model_info(dict): A dictionary containing the following: + model_name (str): The name of the model whose inference service is to be created. + repo_version (str): The commit ID of HuggingFace repo of the model. deployment_resources (dict): Dictionary containing number of cpus, memory and number of gpus to be used for the inference service. model_params(dict): Dictionary containing parameters of the model """ - storageuri = f"pvc://{deploy_name}/{model_name}/{repo_version}" + if model_params["is_custom"]: + storageuri = f"pvc://{deploy_name}/{model_info['model_name']}" + else: + storageuri = f"pvc://{deploy_name}/{model_info['model_name']}/{model_info['repo_version']}" default_model_spec = V1beta1InferenceServiceSpec( predictor=V1beta1PredictorSpec( pytorch=V1beta1TorchServeSpec( @@ -190,8 +216,12 @@ def create_isvc( def execute_inference_on_inputs( - model_inputs, model_name, deploy_name, retry=False, debug=False -): + model_inputs: List, + model_name: str, + deploy_name: str, + retry: bool = False, + debug: bool = False, +) -> bool: """ This function sends a list of model inputs to a specified model deployment using the KServe and gets the inference results. It is used to run inference on a LLM deployed in @@ -254,9 +284,9 @@ def execute_inference_on_inputs( return is_success -def health_check(model_name, deploy_name, model_timeout): +def health_check(model_name: str, deploy_name: str, model_timeout: int) -> None: """ - This function checks if the model is resistered or not. + This function checks if the model is registered or not. Args: model_name (str): The name of the model that is being registered. @@ -290,7 +320,7 @@ def health_check(model_name, deploy_name, model_timeout): sys.exit(1) -def execute(params): +def execute(params: argparse.Namespace) -> None: """ This function orchestrates the deployment and inference of a LLM in a Kubernetes cluster by performing tasks such as creating @@ -310,11 +340,16 @@ def execute(params): deployment_resources["gpus"] = params.gpu deployment_resources["cpus"] = params.cpu deployment_resources["memory"] = params.mem + nfs_server, nfs_path = params.nfs.split(":") deploy_name = params.deploy_name - model_name = params.model_name + + model_info = {} + model_info["model_name"] = params.model_name + model_info["repo_version"] = params.repo_version + model_info["hf_token"] = params.hf_token + input_path = params.data - repo_version = params.repo_version mount_path = params.mount_path model_timeout = params.model_timeout @@ -327,28 +362,30 @@ def execute(params): storage = "100Gi" - model_params = ts.get_model_params(model_name) - if not repo_version: - repo_version = model_params["repo_version"] + model_params = ts.get_model_params(model_info["model_name"]) - check_if_valid_version(model_name, repo_version, mount_path) + if not model_params["is_custom"]: + if not model_info["repo_version"]: + model_info["repo_version"] = model_params["repo_version"] + model_info["repo_id"] = model_params["repo_id"] + model_info["repo_version"] = check_if_valid_version(model_info, mount_path) config.load_kube_config() core_api = client.CoreV1Api() create_pv(core_api, deploy_name, storage, nfs_server, nfs_path) create_pvc(core_api, deploy_name, storage) - create_isvc( - deploy_name, model_name, repo_version, deployment_resources, model_params - ) + create_isvc(deploy_name, model_info, deployment_resources, model_params) print("wait for model registration to complete, will take some time") - health_check(model_name, deploy_name, model_timeout) + health_check(model_info["model_name"], deploy_name, model_timeout) if input_path: check_if_path_exists(input_path, "Input", is_dir=True) model_inputs = get_inputs_from_folder(input_path) - execute_inference_on_inputs(model_inputs, model_name, deploy_name, debug=True) + execute_inference_on_inputs( + model_inputs, model_info["model_name"], deploy_name, debug=True + ) if __name__ == "__main__": @@ -379,6 +416,12 @@ def execute(params): parser.add_argument( "--mount_path", type=str, help="local path to the nfs mount location" ) + parser.add_argument( + "--hf_token", + type=str, + default=None, + help="HuggingFace Hub token to download LLAMA(2) models", + ) # Parse the command-line arguments args = parser.parse_args() execute(args) diff --git a/llm/model_config.json b/llm/model_config.json index 841b4fe..9a2c9ba 100644 --- a/llm/model_config.json +++ b/llm/model_config.json @@ -1,9 +1,8 @@ { "mpt_7b": { "repo_id": "mosaicml/mpt-7b", - "repo_version": "0b57768f52b7775563f7cc78c4724e407b39593b", + "repo_version": "00f72b21dd089db80c7fb50cb606996751500f6d", "handler": "handler.py", - "requirements_file": "model_requirements.txt", "model_params":{ "temperature" : 0.7, "repetition_penalty" : 1.75, @@ -21,7 +20,6 @@ "repo_id": "tiiuae/falcon-7b", "repo_version": "898df1396f35e447d5fe44e0a3ccaaaa69f30d36", "handler": "handler.py", - "requirements_file": "model_requirements.txt", "model_params":{ "temperature" : 0.5, "repetition_penalty" : 2.0, @@ -39,7 +37,6 @@ "repo_id": "meta-llama/Llama-2-7b-hf", "repo_version": "6fdf2e60f86ff2481f2241aaee459f85b5b0bbb9", "handler": "handler.py", - "requirements_file": "model_requirements.txt", "model_params":{ "temperature" : 0.6, "repetition_penalty" : 1.75, @@ -57,10 +54,9 @@ "repo_id": "microsoft/phi-1_5", "repo_version": "b6a7e2fe15c21f5847279f23e280cc5a0e7049ef", "handler": "handler.py", - "requirements_file": "model_requirements.txt", "registration_params":{ - "batch_size" : 16, - "max_batch_delay" : 2000, + "batch_size" : 1, + "max_batch_delay" : 200, "response_timeout" : 2000 }, "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"] @@ -69,7 +65,6 @@ "repo_id": "gpt2", "repo_version": "11c5a3d5811f50298f278a704980280950aedb10", "handler": "handler.py", - "requirements_file": "model_requirements.txt", "gpu_type": ["A100-PCIE-40GB","A100-PCIE-80GB"] } } \ No newline at end of file diff --git a/llm/run.sh b/llm/run.sh index 7ec24ec..4578316 100644 --- a/llm/run.sh +++ b/llm/run.sh @@ -8,7 +8,7 @@ MODEL_TIMEOUT_IN_SEC="1200" function helpFunction() { - echo "Usage: $0 -n -g -f -m -e [OPTIONAL -d -v ]" + echo "Usage: $0 -n -g -f -m -e [OPTIONAL -d -v -t ]" echo -e "\t-f NFS server address with share path information" echo -e "\t-m Absolute path to the NFS local mount location" echo -e "\t-e Name of the deployment metadata" @@ -17,6 +17,7 @@ function helpFunction() echo -e "\t-d Absolute path to the inputs folder that contains data to be predicted." echo -e "\t-g Number of gpus to be used to execute. Set 0 to use cpu" echo -e "\t-v Commit id of the HuggingFace Repo." + echo -e "\t-t Your HuggingFace token (Required only for LLAMA2 model)." exit 1 # Exit script after printing help } @@ -62,12 +63,16 @@ function inference_exec_kubernetes() exec_cmd+=" --repo_version $repo_version" fi + if [ ! -z $hf_token ] ; then + exec_cmd+=" --hf_token $hf_token" + fi + echo "Running the Inference script"; $exec_cmd } # Entry Point -while getopts ":n:v:m:d:g:f:e:" opt; +while getopts ":n:v:m:t:d:g:f:e:" opt; do case "$opt" in n ) model_name="$OPTARG" ;; @@ -77,6 +82,7 @@ do e ) deploy_name="$OPTARG" ;; v ) repo_version="$OPTARG" ;; m ) mount_path="$OPTARG" ;; + t ) hf_token="$OPTARG" ;; ? ) helpFunction ;; # Print helpFunction in case parameter is non-existent esac done diff --git a/llm/tests/__init__.py b/llm/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/llm/tests/test_download.py b/llm/tests/test_download.py new file mode 100644 index 0000000..f45be9c --- /dev/null +++ b/llm/tests/test_download.py @@ -0,0 +1,297 @@ +""" +This module runs pytest tests for download.py file. + +Attributes: + MODEL_NAME: Name of the model used for testing (gpt2). + OUTPUT: absolute path of the output location in local nfs mount. + MODEL_CONFIG_PATH: Path to model_config.json file. + MODEL_TEMP_CONFIG_PATH: Path to backup model_config.json file. +""" +import os +import argparse +import json +import shutil +import pytest +import download +from utils.system_utils import copy_file + +MODEL_NAME = "gpt2" +OUTPUT = os.path.dirname(__file__) +MODEL_CONFIG_PATH = os.path.join( + os.path.dirname(os.path.dirname(__file__)), "model_config.json" +) +MODEL_TEMP_CONFIG_PATH = os.path.join( + os.path.dirname(os.path.dirname(__file__)), "temp_model_config.json" +) + + +def set_args( + model_name="", + output="", + model_path="", + repo_version="", + handler_path="", +): + """ + This function sets the arguments to run download.py. + + Args: + repo_version (str, optional): Repository version of the model. Defaults to "". + model_path (str, optional): Path to model files. Defaults to MODEL_PATH. + output (str, optional): absolute path of the output location in local nfs mount. + handler_path (str, optional): Path to Torchserve handler. Defaults to "". + + Returns: + argparse.Namespace: Parameters to run download.py. + """ + args = argparse.Namespace() + args.model_name = model_name + args.output = output + args.model_path = model_path + args.no_download = True + args.repo_version = repo_version + args.handler_path = handler_path + args.hf_token = None + args.debug = False + return args + + +def empty_folder(folder_path): + """ + This function empties a folder. + """ + try: + # Check if the folder exists + if os.path.exists(folder_path): + # Remove all files in the folder + for filename in os.listdir(folder_path): + file_path = os.path.join(folder_path, filename) + if os.path.isfile(file_path): + os.remove(file_path) + + # Remove all subfolders in the folder + for subfolder in os.listdir(folder_path): + subfolder_path = os.path.join(folder_path, subfolder) + if os.path.isdir(subfolder_path): + shutil.rmtree(subfolder_path) + else: + print(f"Folder '{folder_path}' does not exist.") + except (FileNotFoundError, IsADirectoryError) as e: + print(f"An error occurred: {str(e)}") + + +def custom_model_setup(): + """ + This function is used to setup custom model case. + Returns: + model_path: absolute path of model files + """ + copy_file(MODEL_CONFIG_PATH, MODEL_TEMP_CONFIG_PATH) + with open(MODEL_CONFIG_PATH, "w", encoding="utf-8") as file: + json.dump({}, file) + + repo_version = "11c5a3d5811f50298f278a704980280950aedb10" + model_path = os.path.join( + os.path.dirname(__file__), MODEL_NAME, repo_version, "download" + ) + return model_path + + +def custom_model_restore(): + """ + This function restores the 'model_config.json' file. + """ + os.remove(MODEL_CONFIG_PATH) + copy_file(MODEL_TEMP_CONFIG_PATH, MODEL_CONFIG_PATH) + os.remove(MODEL_TEMP_CONFIG_PATH) + + +def test_empty_model_name_failure(): + """ + This function tests empty model name. + Expected result: Failure. + """ + args = set_args(output=OUTPUT) + try: + download.run_script(args) + except SystemExit as e: + assert e.code == 1 + else: + assert False + + +def test_empty_output_failure(): + """ + This function tests empty output path. + Expected result: Failure. + """ + args = set_args(MODEL_NAME) + try: + download.run_script(args) + except SystemExit as e: + assert e.code == 1 + else: + assert False + + +def test_wrong_model_name_failure(): + """ + This function tests wrong model name. + Expected result: Failure. + """ + args = set_args("wrong_model_name", OUTPUT) + try: + download.run_script(args) + except SystemExit as e: + assert e.code == 1 + else: + assert False + + +def test_wrong_output_failure(): + """ + This function tests wrong output path. + Expected result: Failure. + """ + args = set_args(MODEL_NAME, "/wrong_output_path") + try: + download.run_script(args) + except SystemExit as e: + assert e.code == 1 + else: + assert False + + +def test_wrong_repo_version_failure(): + """ + This function tests wrong repo version. + Expected result: Failure. + """ + args = set_args(MODEL_NAME, OUTPUT, repo_version="wrong_repo_version") + try: + download.run_script(args) + except SystemExit as e: + assert e.code == 1 + else: + assert False + + +def test_wrong_handler_path_failure(): + """ + This function tests wrong handler path. + Expected result: Failure. + """ + args = set_args(MODEL_NAME, OUTPUT, handler_path="/wrong_path.py") + try: + download.run_script(args) + except SystemExit as e: + assert e.code == 1 + else: + assert False + + +def test_no_model_files_failure(): + """ + This function tests skip download without model files. + Expected result: Failure. + """ + args = set_args(MODEL_NAME, OUTPUT) + args.no_download = False + try: + download.run_script(args) + except SystemExit as e: + assert e.code == 1 + else: + assert False + + +def test_default_success(): + """ + This function tests the default GPT2 model. + Expected result: Success. + """ + args = set_args(MODEL_NAME, OUTPUT) + try: + result = download.run_script(args) + except SystemExit: + assert False + else: + assert result is True + + +def test_vaild_repo_version_success(): + """ + This function tests a valid repo version. + Expected result: Success. + """ + args = set_args( + MODEL_NAME, OUTPUT, repo_version="e7da7f221d5bf496a48136c0cd264e630fe9fcc8" + ) + try: + result = download.run_script(args) + except SystemExit: + assert False + else: + assert result is True + + +def test_short_repo_version_success(): + """ + This function tests a valid short repo version + and if model and MAR file already exists. + Expected result: Success. + """ + args = set_args(MODEL_NAME, OUTPUT, repo_version="11c5a3d581") + try: + result = download.run_script(args) + except SystemExit: + assert False + else: + assert result is True + + +def test_custom_model_success(): + """ + This function tests the custom model case. + This is done by clearing the 'model_config.json' and + generating the 'GPT2' MAR file. + Expected result: Success. + """ + model_path = custom_model_setup() + args = set_args(MODEL_NAME, OUTPUT, model_path) + args.no_download = False + try: + result = download.run_script(args) + custom_model_restore() + except SystemExit: + assert False + else: + assert result is True + + +def test_custom_model_no_model_files_failure(): + """ + This function tests the custom model case when + model files folder is empty. + Expected result: Failure. + """ + model_path = custom_model_setup() + model_store_path = os.path.join( + os.path.dirname(__file__), MODEL_NAME, "model-store" + ) + empty_folder(model_path) + empty_folder(model_store_path) + args = set_args(MODEL_NAME, OUTPUT, model_path) + args.no_download = False + try: + download.run_script(args) + custom_model_restore() + except SystemExit as e: + assert e.code == 1 + else: + assert False + + +# Run the tests +if __name__ == "__main__": + pytest.main(["-v", __file__]) diff --git a/llm/utils/generate_data_model.py b/llm/utils/generate_data_model.py new file mode 100644 index 0000000..7ec5fc3 --- /dev/null +++ b/llm/utils/generate_data_model.py @@ -0,0 +1,138 @@ +""" +This module stores the dataclasses GenerateDataModel, MarUtils, RepoInfo, +function set_values that sets the GenerateDataModel attributes and +function set_model_files_and_mar that sets model path and mar output values. +""" +import os +import dataclasses +import argparse + +MODEL_STORE_DIR = "model-store" +MODEL_FILES_LOCATION = "download" + + +@dataclasses.dataclass +class MarUtils: + """ + A class for representing information about a Model Archive (MAR). + + Attributes: + mar_output (str): The path to the MAR output directory. + model_path (str): The path to the model directory. + handler_path (str): The path to the model handler script. + """ + + mar_output = str() + model_path = str() + handler_path = str() + + +@dataclasses.dataclass +class RepoInfo: + """ + A class for specifying details related to a model repository in HuggingFace. + + Attributes: + repo_id (str): The identifier of the model repository. + repo_version (str): The version of the model in the repository. + hf_token (str): The Hugging Face token for authentication. + """ + + repo_id = str() + repo_version = str() + hf_token = str() + + +class GenerateDataModel: + """ + A class representing a model download configuration for data retrieval. + + Attributes: + - model_name (str): The name of the model to be downloaded. + - download_model (bool): A boolean indicating whether to download + the model (True) or not (False). + - model_path (str): The path where the downloaded model will be stored locally. + - output (str): Mount path to the nfs server to be used in the kube + PV where model files and model archive file be stored. + - mar_output (str): The output directory specifically for the Model Archive (MAR) files. + - repository_info (dict): Dictionary that will contain the unique identifier + or name of the model repository, the version of the model + within the repository and the path to the model's handler + script for custom processing + - hf_token (str): The Hugging Face API token for authentication when + accessing models from the Hugging Face Model Hub. + - debug (bool): A boolean indicating whether to enable debugging mode for + the download process (True) or not (False). + """ + + model_name = str() + download_model = bool() + output = str() + mar_utils = MarUtils() + repo_info = RepoInfo() + is_custom = bool() + debug = bool() + + def __init__(self, params: argparse.Namespace) -> None: + """ + This is the init function that calls set_values method. + + Args: + params (argparse.Namespace): An argparse.Namespace object + containing command-line arguments. + """ + self.set_values(params) + + def set_values(self, params: argparse.Namespace) -> None: + """ + Set values for the GenerateDataModel object based on the command-line arguments. + Args: + params (argparse.Namespace): An argparse.Namespace object + containing command-line arguments. + Returns: + GenerateDataModel: An instance of the GenerateDataModel + class with values set based on the arguments. + """ + self.model_name = params.model_name + self.download_model = params.no_download + self.output = params.output + self.is_custom = False + + self.mar_utils.handler_path = params.handler_path + + self.repo_info.repo_version = params.repo_version + self.repo_info.hf_token = params.hf_token + + self.debug = params.debug + + def set_model_files_and_mar(self, params: argparse.Namespace) -> None: + """ + This function sets model path and mar output values. + Args: + gen_model (GenerateDataModel): An instance of the GenerateDataModel + class with relevant information. + params (argparse.Namespace): An argparse.Namespace object + containing command-line arguments. + Returns: + None + """ + if self.is_custom: + self.mar_utils.model_path = params.model_path + self.mar_utils.mar_output = os.path.join( + self.output, + self.model_name, + MODEL_STORE_DIR, + ) + else: + self.mar_utils.model_path = os.path.join( + self.output, + self.model_name, + self.repo_info.repo_version, + MODEL_FILES_LOCATION, + ) + self.mar_utils.mar_output = os.path.join( + self.output, + self.model_name, + self.repo_info.repo_version, + MODEL_STORE_DIR, + ) diff --git a/llm/utils/hf_utils.py b/llm/utils/hf_utils.py new file mode 100644 index 0000000..a2f6c97 --- /dev/null +++ b/llm/utils/hf_utils.py @@ -0,0 +1,104 @@ +""" +Utility functions for using HuggingFace Api +""" +import sys +from typing import List +from huggingface_hub import HfApi +from huggingface_hub.utils import ( + RepositoryNotFoundError, + RevisionNotFoundError, +) +from utils.generate_data_model import GenerateDataModel + + +def get_repo_files_list(gen_model: GenerateDataModel) -> List[str]: + """ + This function returns a list of all files in the HuggingFace repo of + the model. + Args: + gen_model (GenerateDataModel): An instance of the GenerateDataModel + class with relevant information. + Returns: + repo_files (list): all files in the HuggingFace repo of + the model + Raises: + sys.exit(1): If repo_id, repo_version or huggingface token + is not valid, the function will terminate + the program with an exit code of 1. + """ + try: + hf_api = HfApi() + repo_files = hf_api.list_repo_files( + repo_id=gen_model.repo_info.repo_id, + revision=gen_model.repo_info.repo_version, + token=gen_model.repo_info.hf_token, + ) + return repo_files + except (RepositoryNotFoundError, RevisionNotFoundError, KeyError): + print( + ( + "## Error: Please check either repo_id, repo_version " + "or huggingface token is not correct" + ) + ) + sys.exit(1) + + +def get_repo_commit_id(repo_id: str, revision: str, token: str) -> str: + """ + This function returns the whole Commit ID from HuggingFace repo of + the model. + Args: + revision (str): The commit ID of HuggingFace repo of the model. + repo_id (str): The repo id. + token (str): Your HuggingFace token (Required only for LLAMA2 model). + Returns: + commit id (str): The whole commit ID of HuggingFace repo of + the model. + Raises: + sys.exit(1): If repo_id, repo_version or huggingface token + is not valid, the function will terminate + the program with an exit code of 1. + """ + try: + hf_api = HfApi() + commit_info = hf_api.list_repo_commits( + repo_id=repo_id, + revision=revision, + token=token, + ) + return commit_info[0].commit_id + except (RepositoryNotFoundError, RevisionNotFoundError): + print( + ( + "## Error: Please check either repo_id, repo_version " + "or huggingface token is not correct" + ) + ) + sys.exit(1) + + +def hf_token_check(repo_id: str, token: str) -> None: + """ + This function checks if HuggingFace token is provided for + Llama 2 model + Args: + repo_id (str): The repo id. + token (str): Your HuggingFace token (Required only for LLAMA2 model). + Returns: + None + Raises: + sys.exit(1): If if HuggingFace token is not provided for + Llama 2 model, the function will terminate + the program with an exit code of 1. + """ + if repo_id.startswith("meta-llama") and token is None: + # Make sure there is HF hub token for LLAMA(2) + print( + ( + "HuggingFace Hub token is required for llama download. " + "Please specify it using --hf_token=. Refer " + "https://huggingface.co/docs/hub/security-tokens" + ) + ) + sys.exit(1) diff --git a/llm/utils/marsgen.py b/llm/utils/marsgen.py index 0b52e77..3ebd1bc 100644 --- a/llm/utils/marsgen.py +++ b/llm/utils/marsgen.py @@ -5,16 +5,25 @@ import os import sys import subprocess +from typing import Dict from utils.system_utils import check_if_path_exists, get_all_files_in_directory +from utils.generate_data_model import GenerateDataModel +REQUIREMENTS_FILE = "model_requirements.txt" -def generate_mars(dl_model, model_config, model_store_dir, debug=False): + +def generate_mars( + gen_model: GenerateDataModel, + model_config: str, + model_store_dir: str, + debug: bool = False, +) -> None: """ This function generates a Model Archive (MAR) file for a specified LLM using the provided model configuration, model store directory, and optional debug information. Args: - dl_model (LLM): An object representing the LLM to generate the MAR for. + gen_model (LLM): An object representing the LLM to generate the MAR for. model_config (str): The path to the JSON model configuration file. model_store_dir (str): The directory where the MAR file will be stored. debug (bool, optional): A flag indicating whether to @@ -36,34 +45,29 @@ def generate_mars(dl_model, model_config, model_store_dir, debug=False): with open(model_config, encoding="utf-8") as f: models = json.loads(f.read()) - if dl_model.model_name not in models: - print( - "## Please check your model name, it should be one of the following : " - ) - print(list(models.keys())) - sys.exit(1) - - model = models[dl_model.model_name] + if gen_model.model_name not in models: + if not gen_model.is_custom: + print( + "## Please check your model name, it should be one of the following : " + ) + print(list(models.keys())) + sys.exit(1) extra_files = None - extra_files_list = get_all_files_in_directory(dl_model.mar_utils.model_path) + extra_files_list = get_all_files_in_directory(gen_model.mar_utils.model_path) extra_files_list = [ - os.path.join(dl_model.mar_utils.model_path, file) + os.path.join(gen_model.mar_utils.model_path, file) for file in extra_files_list ] extra_files = ",".join(extra_files_list) - requirements_file = None - if model.get("requirements_file") and model["requirements_file"]: - requirements_file = os.path.join( - os.path.dirname(__file__), model["requirements_file"] - ) - check_if_path_exists(requirements_file) + requirements_file = os.path.join(os.path.dirname(__file__), REQUIREMENTS_FILE) + check_if_path_exists(requirements_file) model_archiver_args = { - "model_name": dl_model.model_name, - "version": dl_model.repo_info.repo_version, - "handler": dl_model.mar_utils.handler_path, + "model_name": gen_model.model_name, + "version": gen_model.repo_info.repo_version, + "handler": gen_model.mar_utils.handler_path, "extra_files": extra_files, "requirements_file": requirements_file, "export_path": model_store_dir, @@ -78,24 +82,24 @@ def generate_mars(dl_model, model_config, model_store_dir, debug=False): try: subprocess.check_call(cmd, shell=True) - marfile = f"{dl_model.model_name}.mar" + marfile = f"{gen_model.model_name}.mar" print(f"## {marfile} is generated.\n") except subprocess.CalledProcessError as exc: print("## Creation failed !\n") if debug: - print(f"## {model['model_name']} creation failed !, error: {exc}\n") + print(f"## {gen_model.model_name} creation failed !, error: {exc}\n") sys.exit(1) os.chdir(cwd) def model_archiver_command_builder( - model_archiver_args, - runtime=None, - archive_format=None, - force=True, - debug=False, -): + model_archiver_args: Dict[str, str], + runtime: int = None, + archive_format: str = None, + force: bool = True, + debug: bool = False, +) -> str: """ This function generates the torch model archiver command that will be used for generating model archive file diff --git a/llm/utils/system_utils.py b/llm/utils/system_utils.py index f265819..63224ae 100644 --- a/llm/utils/system_utils.py +++ b/llm/utils/system_utils.py @@ -5,9 +5,10 @@ import sys import shutil from pathlib import Path +from typing import List -def check_if_path_exists(filepath, err="", is_dir=False): +def check_if_path_exists(filepath: str, err: str = "", is_dir: bool = False) -> None: """ This function checks if a given path exists. Args: @@ -22,7 +23,7 @@ def check_if_path_exists(filepath, err="", is_dir=False): sys.exit(1) -def create_folder_if_not_exists(path): +def create_folder_if_not_exists(path: str) -> None: """ This function creates a folder in the specified path if it does not already exist @@ -35,7 +36,7 @@ def create_folder_if_not_exists(path): print(f"The new directory is created! - {path}") -def delete_directory(directory_path): +def delete_directory(directory_path: str) -> None: """ This function deletes directory in the specified path @@ -55,7 +56,7 @@ def delete_directory(directory_path): print(f"Error deleting contents from '{directory_path}': {str(e)}") -def copy_file(source_file, destination_file): +def copy_file(source_file: str, destination_file: str) -> None: """ This function copies a file from source file path to destination file path @@ -73,12 +74,12 @@ def copy_file(source_file, destination_file): print(f"## Error: {e}") -def get_all_files_in_directory(directory): +def get_all_files_in_directory(directory: str) -> List[str]: """ This function provides a list of file names in a directory and its sub-directories Args: - path (str): The path to the directory. + directory (str): The path to the directory. Returns: ["file.txt", "sub-directory/file.txt"] """ @@ -90,3 +91,15 @@ def get_all_files_in_directory(directory): if file.is_file() ] return output + + +def check_if_folder_empty(path: str) -> bool: + """ + This function checks if a directory is empty. + Args: + path (str): Path of the dirctory to check. + Returns: + bool: True if directory is empty, False otherwise. + """ + dir_items = os.listdir(path) + return len(dir_items) == 0 diff --git a/llm/utils/tsutils.py b/llm/utils/tsutils.py index 822f205..3a2ad98 100644 --- a/llm/utils/tsutils.py +++ b/llm/utils/tsutils.py @@ -2,15 +2,19 @@ Utility functions for running inference and getiing model parameters """ import os -import sys import json import collections +from typing import Dict, Tuple import requests def run_inference_v2( - model_name, file_name, connection_params, timeout=120, debug=False -): + model_name: str, + file_name: str, + connection_params: Dict, + timeout: int = 120, + debug: bool = False, +) -> requests.Response: """ This function runs inference using a specified model via a REST API Args: @@ -51,7 +55,7 @@ def run_inference_v2( return response -def get_model_params(model_name): +def get_model_params(model_name: str) -> Dict[str, str]: """ This function reads the model parameters from model_config.json and stores then in a dict. Args: @@ -69,12 +73,14 @@ def get_model_params(model_name): model_config = json.loads(file.read()) if model_name in model_config: model_params["repo_version"] = model_config[model_name]["repo_version"] + model_params["repo_id"] = model_config[model_name]["repo_id"] + model_params["is_custom"] = False else: + model_params["is_custom"] = True print( - "## Please check your model name, it should be one of the following : " + f"## Using custom MAR file : {model_name}.mar\n\n" + "WARNING: This model has not been validated on any GPUs\n\n" ) - print(list(model_config.keys())) - sys.exit(1) if model_name in model_config and "model_params" in model_config[model_name]: param_config = model_config[model_name]["model_params"] @@ -91,3 +97,35 @@ def get_model_params(model_name): model_params["max_new_tokens"] = param_config["max_new_tokens"] return model_params + + +def get_params_for_registration(model_name: str) -> Tuple[str, str, str, str]: + """ + This function reads registration parameters from model_config.json returns them. + The generation parameters are : + initial_workers, batch_size, max_batch_delay, response_timeout. + Args: + model_name (str): Name of the model. + Returns: + str: initial_workers, batch_size, max_batch_delay, response_timeout + """ + dirpath = os.path.dirname(__file__) + initial_workers = batch_size = max_batch_delay = response_timeout = None + with open( + os.path.join(dirpath, "../model_config.json"), encoding="UTF-8" + ) as config: + model_config = json.loads(config.read()) + if ( + model_name in model_config + and "registration_params" in model_config[model_name] + ): + param_config = model_config[model_name]["registration_params"] + if "initial_workers" in param_config: + initial_workers = param_config["initial_workers"] + if "batch_size" in param_config: + batch_size = param_config["batch_size"] + if "max_batch_delay" in param_config: + max_batch_delay = param_config["max_batch_delay"] + if "response_timeout" in param_config: + response_timeout = param_config["response_timeout"] + return initial_workers, batch_size, max_batch_delay, response_timeout