From d2a8028b3b992d1765bb32dad8d93931892df47c Mon Sep 17 00:00:00 2001 From: pm3310 Date: Sat, 17 Feb 2024 12:59:36 +0000 Subject: [PATCH] LLM Platform --- sagify/commands/llm.py | 255 +++++++++++++++++++++++++++++++++++-- sagify/llm_gateway/main.py | 4 +- 2 files changed, 246 insertions(+), 13 deletions(-) diff --git a/sagify/commands/llm.py b/sagify/commands/llm.py index 34cb237..9016cee 100644 --- a/sagify/commands/llm.py +++ b/sagify/commands/llm.py @@ -15,6 +15,94 @@ click.disable_unicode_literals_warning = True +_MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME = { + 'llama-2-7b': ('meta-textgeneration-llama-2-7b-f', 'https://huggingface.co/meta-llama/Llama-2-7b'), + 'llama-2-13b': ('meta-textgeneration-llama-2-13b-f', 'https://huggingface.co/meta-llama/Llama-2-13b'), + 'llama-2-70b': ('meta-textgeneration-llama-2-70b-f', 'https://huggingface.co/meta-llama/Llama-2-70b'), +} + +_VALID_INSTANCE_TYPES_PER_CHAT_COMPLETIONS_MODEL = { + 'meta-textgeneration-llama-2-7b-f': [ + ('ml.g5.2xlarge', 'https://instances.vantage.sh/aws/ec2/g5.2xlarge'), + ('ml.g5.4xlarge', 'https://instances.vantage.sh/aws/ec2/g5.4xlarge'), + ('ml.g5.12xlarge', 'https://instances.vantage.sh/aws/ec2/g5.12xlarge'), + ('ml.g5.24xlarge', 'https://instances.vantage.sh/aws/ec2/g5.24xlarge'), + ('ml.g5.48xlarge', 'https://instances.vantage.sh/aws/ec2/g5.48xlarge'), + ('ml.p3dn.24xlarge', 'https://instances.vantage.sh/aws/ec2/p3dn.24xlarge'), + ], + 'meta-textgeneration-llama-2-13b-f': [ + ('ml.g5.12xlarge', 'https://instances.vantage.sh/aws/ec2/g5.12xlarge'), + ('ml.g5.24xlarge', 'https://instances.vantage.sh/aws/ec2/g5.24xlarge'), + ('ml.g5.48xlarge', 'https://instances.vantage.sh/aws/ec2/g5.48xlarge'), + ], + 'meta-textgeneration-llama-2-70b-f': [ + ('ml.g5.48xlarge', 'https://instances.vantage.sh/aws/ec2/g5.48xlarge'), + ], +} + +_MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME = { + 'stabilityai-stable-diffusion-v2': ( + 'model-txt2img-stabilityai-stable-diffusion-v2', + 'https://huggingface.co/stabilityai/stable-diffusion-2' + ), + 'stabilityai-stable-diffusion-v2-1-base': ( + 'model-txt2img-stabilityai-stable-diffusion-v2-1-base', + 'https://huggingface.co/stabilityai/stable-diffusion-2-1-base' + ), + 'stabilityai-stable-diffusion-v2-fp16': ( + 'model-txt2img-stabilityai-stable-diffusion-v2-fp16', + 'https://huggingface.co/stabilityai/stable-diffusion-2/tree/fp16' + ) +} + +_VALID_INSTANCE_TYPES_PER_IMAGE_CREATIONS_MODEL = { + 'model-txt2img-stabilityai-stable-diffusion-v2': [ + ('ml.p3.2xlarge', 'https://instances.vantage.sh/aws/ec2/p3.2xlarge'), + ('ml.g4dn.2xlarge', 'https://instances.vantage.sh/aws/ec2/g4dn.2xlarge'), + ('ml.g5.2xlarge', 'https://instances.vantage.sh/aws/ec2/g5.2xlarge'), + ], + 'model-txt2img-stabilityai-stable-diffusion-v2-1-base': [ + ('ml.p3.2xlarge', 'https://instances.vantage.sh/aws/ec2/p3.2xlarge'), + ('ml.g4dn.2xlarge', 'https://instances.vantage.sh/aws/ec2/g4dn.2xlarge'), + ('ml.g5.2xlarge', 'https://instances.vantage.sh/aws/ec2/g5.2xlarge'), + ], + 'model-txt2img-stabilityai-stable-diffusion-v2-fp16': [ + ('ml.p3.2xlarge', 'https://instances.vantage.sh/aws/ec2/p3.2xlarge'), + ('ml.g4dn.2xlarge', 'https://instances.vantage.sh/aws/ec2/g4dn.2xlarge'), + ('ml.g5.2xlarge', 'https://instances.vantage.sh/aws/ec2/g5.2xlarge'), + ], +} + +_MAPPING_EMBEDDINGS_MODEL_ID_TO_MODEL_NAME = { + 'bge-large-en': ('huggingface-sentencesimilarity-bge-large-en', 'https://huggingface.co/BAAI/bge-large-en'), + 'bge-base-en': ('huggingface-sentencesimilarity-bge-base-en', 'https://huggingface.co/BAAI/bge-base-en'), + 'gte-large': ('huggingface-sentencesimilarity-gte-large', 'https://huggingface.co/thenlper/gte-large'), + 'gte-base': ('huggingface-sentencesimilaritygte-base', 'https://huggingface.co/thenlper/gte-base'), + 'e5-large-v2': ('huggingface-sentencesimilarity-e5-large-v2', 'https://huggingface.co/intfloat/e5-large-v2'), + 'bge-small-en': ('huggingface-sentencesimilarity-bge-small-en', 'https://huggingface.co/BAAI/bge-small-en'), + 'e5-base-v2': ('huggingface-sentencesimilarity-e5-base-v2', 'https://huggingface.co/intfloat/e5-base-v2'), + 'multilingual-e5-large': ('huggingface-sentencesimilarity-multilingual-e5-large', 'https://huggingface.co/intfloat/multilingual-e5-large'), + 'e5-large': ('huggingface-sentencesimilarity-e5-large', 'https://huggingface.co/intfloat/e5-large'), + 'gte-small': ('huggingface-sentencesimilarity-gte-small', 'https://huggingface.co/thenlper/gte-small'), + 'e5-base': ('huggingface-sentencesimilarity-e5-base', 'https://huggingface.co/intfloat/e5-base'), + 'e5-small-v2': ('huggingface-sentencesimilarity-e5-small-v2', 'https://huggingface.co/intfloat/e5-small-v2'), + 'multilingual-e5-base': ('huggingface-sentencesimilarity-multilingual-e5-base', 'https://huggingface.co/intfloat/multilingual-e5-base'), + 'all-MiniLM-L6-v2': ('huggingface-sentencesimilarity-all-MiniLM-L6-v2', 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2'), +} + +_VALID_EMBEDDINGS_INSTANCE_TYPES = [ + ('ml.g5.2xlarge', 'https://instances.vantage.sh/aws/ec2/g5.2xlarge'), + ('ml.g5.4xlarge', 'https://instances.vantage.sh/aws/ec2/g5.4xlarge'), + ('ml.g5.12xlarge', 'https://instances.vantage.sh/aws/ec2/g5.12xlarge'), + ('ml.g5.24xlarge', 'https://instances.vantage.sh/aws/ec2/g5.24xlarge'), + ('ml.g5.48xlarge', 'https://instances.vantage.sh/aws/ec2/g5.48xlarge'), + ('ml.p3dn.24xlarge', 'https://instances.vantage.sh/aws/ec2/p3dn.24xlarge'), + ('ml.p3.2xlarge', 'https://instances.vantage.sh/aws/ec2/p3.2xlarge'), + ('ml.p3.8xlarge', 'https://instances.vantage.sh/aws/ec2/p3.8xlarge'), + ('ml.p3.16xlarge', 'https://instances.vantage.sh/aws/ec2/p3.16xlarge'), +] + + @click.group() def llm(): """ @@ -22,6 +110,91 @@ def llm(): """ pass +@llm.command() +def platforms(): + """ + Commands to list supported platforms for LLM models + """ + logger.info("Platforms:") + logger.info(" - OpenAI: https://platform.openai.com/docs/overview") + logger.info(" - AWS Sagemaker: https://aws.amazon.com/sagemaker") + +@llm.command() +@click.option( + '--all', + is_flag=True, + show_default=True, + default=False, + help='Show all LLM models.' +) +@click.option( + '--chat-completions', + is_flag=True, + show_default=True, + default=False, + help='Show chat completions models.' +) +@click.option( + '--image-creations', + is_flag=True, + show_default=True, + default=False, + help='Show image creations models.' +) +@click.option( + '--embeddings', + is_flag=True, + show_default=True, + default=False, + help='Show embeddings models.' +) +def sagemaker_models(all, chat_completions, image_creations, embeddings): + """ + Command to list available LLM models + """ + logger.info(ASCII_LOGO) + + if not any([all, chat_completions, image_creations, embeddings]): + logger.error("At least one of the flags --all, --chat-completions, --image-creations, --embeddings must be defined.") + sys.exit(-1) + + if all: + chat_completions, image_creations, embeddings = True, True, True + + logger.info("Available LLM models:\n") + + if chat_completions: + logger.info("Chat Completions:") + for model_id, (model_name, model_url) in _MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME.items(): + logger.info(" - Model: {}".format(model_id)) + logger.info(" Model URL: {}".format(model_url)) + logger.info(" Instance Types:") + for instance_type, instance_url in _VALID_INSTANCE_TYPES_PER_CHAT_COMPLETIONS_MODEL[model_name]: + logger.info(" - Instance Type: {}".format(instance_type)) + logger.info(" Instance URL: {}".format(instance_url)) + logger.info("\n") + + if image_creations: + logger.info("Image Creations:") + for model_id, (model_name, model_url) in _MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME.items(): + logger.info(" - Model: {}".format(model_id)) + logger.info(" Model URL: {}".format(model_url)) + logger.info(" Instance Types:") + for instance_type, instance_url in _VALID_INSTANCE_TYPES_PER_IMAGE_CREATIONS_MODEL[model_name]: + logger.info(" - Instance Type: {}".format(instance_type)) + logger.info(" Instance URL: {}".format(instance_url)) + logger.info("\n") + + if embeddings: + logger.info("\nEmbeddings:") + for model_id, (model_name, model_url) in _MAPPING_EMBEDDINGS_MODEL_ID_TO_MODEL_NAME.items(): + logger.info(" - Model: {}".format(model_id)) + logger.info(" Model URL: {}".format(model_url)) + logger.info(" Instance Types:") + for instance_type, instance_url in _VALID_EMBEDDINGS_INSTANCE_TYPES: + logger.info(" - Instance Type: {}".format(instance_type)) + logger.info(" Instance URL: {}".format(instance_url)) + @llm.command() @click.option( @@ -104,20 +277,17 @@ def start( # Default configuration default_config = { 'chat_completions': { - 'model': 'meta-textgeneration-llama-2-7b-f', - 'model_version': '1.*', + 'model': 'llama-2-7b', 'instance_type': 'ml.g5.2xlarge', 'num_instances': 1, }, 'image_creations': { - 'model': 'model-txt2img-stabilityai-stable-diffusion-v2-1-base', - 'model_version': '1.*', + 'model': 'stabilityai-stable-diffusion-v2-1-base', 'instance_type': 'ml.p3.2xlarge', 'num_instances': 1, }, 'embeddings': { - 'model': 'huggingface-sentencesimilarity-gte-small', - 'model_version': '1.*', + 'model': 'gte-small', 'instance_type': 'ml.g5.2xlarge', 'num_instances': 1, }, @@ -139,9 +309,27 @@ def start( } if chat_completions: + if default_config['chat_completions']['model'] not in _MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME: + raise ValueError( + "Invalid chat completions model id. Available model ids: {}".format( + list(_MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME.keys()) + ) + ) + + if default_config['chat_completions']['instance_type'] not in _VALID_INSTANCE_TYPES_PER_CHAT_COMPLETIONS_MODEL[ + _MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME[default_config['chat_completions']['model']][0] + ]: + raise ValueError( + "Invalid instance type for chat completions model. Available instance types: {}".format( + _VALID_INSTANCE_TYPES_PER_CHAT_COMPLETIONS_MODEL[ + _MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME[default_config['chat_completions']['model']][0] + ] + ) + ) + chat_endpoint_name, _ = api_cloud.foundation_model_deploy( - model_id=default_config['chat_completions']['model'], - model_version=default_config['chat_completions']['model_version'], + model_id=_MAPPING_CHAT_COMPLETIONS_MODEL_ID_TO_MODEL_NAME[default_config['chat_completions']['model']][0], + model_version='1.*', num_instances=default_config['chat_completions']['num_instances'], ec2_type=default_config['chat_completions']['instance_type'], aws_region=aws_region, @@ -155,9 +343,27 @@ def start( logger.info("Chat Completions Endpoint Name: {}".format(chat_endpoint_name)) if image_creations: + if default_config['image_creations']['model'] not in _MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME: + raise ValueError( + "Invalid image creations model id. Available model ids: {}".format( + list(_MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME.keys()) + ) + ) + + if default_config['image_creations']['instance_type'] not in _VALID_INSTANCE_TYPES_PER_IMAGE_CREATIONS_MODEL[ + _MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME[default_config['image_creations']['model']][0] + ]: + raise ValueError( + "Invalid instance type for image creations model. Available instance types: {}".format( + _VALID_INSTANCE_TYPES_PER_IMAGE_CREATIONS_MODEL[ + _MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME[default_config['image_creations']['model']][0] + ] + ) + ) + image_endpoint_name, _ = api_cloud.foundation_model_deploy( - model_id=default_config['image_creations']['model'], - model_version=default_config['image_creations']['model_version'], + model_id=_MAPPING_IMAGE_CREATION_MODEL_ID_TO_MODEL_NAME[default_config['image_creations']['model']][0], + model_version='1.*', num_instances=default_config['image_creations']['num_instances'], ec2_type=default_config['image_creations']['instance_type'], aws_region=aws_region, @@ -171,9 +377,23 @@ def start( logger.info("Image Creations Endpoint Name: {}".format(image_endpoint_name)) if embeddings: + if default_config['embeddings']['model'] not in _MAPPING_EMBEDDINGS_MODEL_ID_TO_MODEL_NAME: + raise ValueError( + "Invalid embeddings model id. Available model ids: {}".format( + list(_MAPPING_EMBEDDINGS_MODEL_ID_TO_MODEL_NAME.keys()) + ) + ) + + if default_config['embeddings']['instance_type'] not in _VALID_EMBEDDINGS_INSTANCE_TYPES: + raise ValueError( + "Invalid instance type for embeddings model. Available instance types: {}".format( + _VALID_EMBEDDINGS_INSTANCE_TYPES + ) + ) + embeddings_endpoint_name, _ = api_cloud.foundation_model_deploy( - model_id=default_config['embeddings']['model'], - model_version=default_config['embeddings']['model_version'], + model_id=_MAPPING_EMBEDDINGS_MODEL_ID_TO_MODEL_NAME[default_config['embeddings']['model']][0], + model_version='1.*', num_instances=default_config['embeddings']['num_instances'], ec2_type=default_config['embeddings']['instance_type'], aws_region=aws_region, @@ -289,6 +509,17 @@ def stop( logger.info("{}".format(e)) sys.exit(-1) +@llm.command() +def start_local_gateway(): + """ + Command to start local gateway + """ + logger.info(ASCII_LOGO) + logger.info("Starting local gateway...\n") + from sagify.llm_gateway.main import start_server + start_server() +llm.add_command(platforms) +llm.add_command(sagemaker_models) llm.add_command(start) llm.add_command(stop) diff --git a/sagify/llm_gateway/main.py b/sagify/llm_gateway/main.py index 31d4d96..19f993b 100644 --- a/sagify/llm_gateway/main.py +++ b/sagify/llm_gateway/main.py @@ -14,6 +14,8 @@ app.include_router(api_router) app.add_exception_handler(InternalServerError, internal_server_error_handler) +def start_server(): + uvicorn.run("sagify.llm_gateway.main:app", port=8080, host="0.0.0.0") if __name__ == "__main__": - uvicorn.run("sagify.llm_gateway.main:app", port=8080, host="0.0.0.0") + start_server()